pull request nerfstudio-project#536

umaaji298 · umaaji298 · commit 3bdf78591824 · 2025-09-18T19:39:38.000+09:00
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
@@ -1,3 +1,31 @@
+# 先頭付近に追記（ファイル先頭でもOK）
+# import logging, unicodedata, os, difflib
+# logger = logging.getLogger("colmap_debug")
+# if not logger.handlers:
+#     h = logging.FileHandler("colmap_debug.txt", encoding="utf-8")
+#     fmt = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
+#     h.setFormatter(fmt)
+#     logger.addHandler(h)
+#     logger.setLevel(logging.DEBUG)
+
+# def _norm_key(s: str) -> str:
+#     # キーの正規化: 区切り・先頭 ./ ・大文字小文字・Unicode などを吸収
+#     s = unicodedata.normalize("NFC", s.strip())
+#     s = s.replace("\\", "/")
+#     if s.startswith("./"):
+#         s = s[2:]
+#     return s.lower()
+
+# def _peek(head, n=5):
+#     head = list(head)
+#     return head[:n]
+
+# def _summarize_list(name, arr, n=5):
+#     logger.debug("%s: count=%d, sample=%s", name, len(arr), _peek(arr, n))
+# -----debug
+# 既に import 済みなら重複不要
+
+from collections import defaultdict
 import json
 import os
 from typing import Any, Dict, List, Optional
@@ -6,8 +34,9 @@
 import imageio.v2 as imageio
 import numpy as np
 import torch
+
 from PIL import Image
-from pycolmap import SceneManager
+from pycolmap import Reconstruction, CameraModelId
 from tqdm import tqdm
 from typing_extensions import assert_never
 
@@ -18,6 +47,17 @@
     transform_points,
 )
 
+# path patch
+import unicodedata
+def _norm_key(s: str) -> str:
+    # 区切り/先頭.//大文字小文字/Unicode揺れを吸収
+    s = unicodedata.normalize("NFC", s.strip())
+    s = s.replace("\\", "/")
+    if s.startswith("./"):
+        s = s[2:]
+    return s.lower()
+# -----
+
 
 def _get_rel_paths(path_dir: str) -> List[str]:
     """Recursively get relative paths of files in a directory."""
@@ -75,25 +115,28 @@ def __init__(
             colmap_dir
         ), f"COLMAP directory {colmap_dir} does not exist."
 
-        manager = SceneManager(colmap_dir)
-        manager.load_cameras()
-        manager.load_images()
-        manager.load_points3D()
+        manager = Reconstruction(colmap_dir)
+
+        # point_id -> point3D_id_contiguous
+        point3D_id_contiguous = dict()
+        for i, point_id in enumerate(manager.points3D.keys()):
+            point3D_id_contiguous[point_id] = i
 
         # Extract extrinsic matrices in world-to-camera format.
         imdata = manager.images
         w2c_mats = []
         camera_ids = []
         Ks_dict = dict()
+        point_indices = defaultdict(list) # image_name -> [point_idx]        
         params_dict = dict()
         imsize_dict = dict()  # width, height
         mask_dict = dict()
         bottom = np.array([0, 0, 0, 1]).reshape(1, 4)
         for k in imdata:
             im = imdata[k]
-            rot = im.R()
-            trans = im.tvec.reshape(3, 1)
-            w2c = np.concatenate([np.concatenate([rot, trans], 1), bottom], axis=0)
+            w2c = im.cam_from_world().matrix()
+            w2c = np.concatenate([w2c, bottom], axis=0)
+
             w2c_mats.append(w2c)
 
             # support different camera intrinsics
@@ -102,30 +145,40 @@ def __init__(
 
             # camera intrinsics
             cam = manager.cameras[camera_id]
-            fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy
-            K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+
+            K = cam.calibration_matrix()
             K[:2, :] /= factor
             Ks_dict[camera_id] = K
 
+            # get image_name -> [point_idx] dict
+            for obs_point2d in im.get_observation_points2D():
+                point_indices[im.name].append(point3D_id_contiguous[obs_point2d.point3D_id])
+
             # Get distortion parameters.
-            type_ = cam.camera_type
-            if type_ == 0 or type_ == "SIMPLE_PINHOLE":
+            type_ = cam.model
+            # SIMPLE_PINHOLE:     f, cx, cy
+            if type_ == CameraModelId.SIMPLE_PINHOLE:
                 params = np.empty(0, dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 1 or type_ == "PINHOLE":
+            # PINHOLE:            fx, fy, cx, cy
+            elif type_ == CameraModelId.PINHOLE:
                 params = np.empty(0, dtype=np.float32)
                 camtype = "perspective"
-            if type_ == 2 or type_ == "SIMPLE_RADIAL":
-                params = np.array([cam.k1, 0.0, 0.0, 0.0], dtype=np.float32)
+            # SIMPLE_RADIAL:      f, cx, cy, k
+            if type_ == CameraModelId.SIMPLE_RADIAL:
+                params = np.array([cam.params[3], 0.0, 0.0, 0.0], dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 3 or type_ == "RADIAL":
-                params = np.array([cam.k1, cam.k2, 0.0, 0.0], dtype=np.float32)
+            # RADIAL:             f, cx, cy, k1, k2
+            elif type_ == CameraModelId.RADIAL:
+                params = np.array([cam.params[3], cam.params[4], 0.0, 0.0], dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 4 or type_ == "OPENCV":
-                params = np.array([cam.k1, cam.k2, cam.p1, cam.p2], dtype=np.float32)
+            # OPENCV:             fx, fy, cx, cy, k1, k2, p1, p2
+            elif type_ == CameraModelId.OPENCV:
+                params = np.array([cam.params[4], cam.params[5], cam.params[6], cam.params[7]], dtype=np.float32)
                 camtype = "perspective"
-            elif type_ == 5 or type_ == "OPENCV_FISHEYE":
-                params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32)
+            # OPENCV_FISHEYE:     fx, fy, cx, cy, k1, k2, k3, k4
+            elif type_ == CameraModelId.OPENCV_FISHEYE:
+                params = np.array([cam.params[4], cam.params[5], cam.params[6], cam.params[7]], dtype=np.float32)
                 camtype = "fisheye"
             assert (
                 camtype == "perspective" or camtype == "fisheye"
@@ -140,7 +193,7 @@ def __init__(
 
         if len(imdata) == 0:
             raise ValueError("No images found in COLMAP.")
-        if not (type_ == 0 or type_ == 1):
+        if not (type_ == CameraModelId.PINHOLE or type_ == CameraModelId.SIMPLE_PINHOLE):
             print("Warning: COLMAP Camera is not PINHOLE. Images have distortion.")
 
         w2c_mats = np.stack(w2c_mats, axis=0)
@@ -195,21 +248,27 @@ def __init__(
                 colmap_image_dir, image_dir + "_png", factor=factor
             )
             image_files = sorted(_get_rel_paths(image_dir))
-        colmap_to_image = dict(zip(colmap_files, image_files))
-        image_paths = [os.path.join(image_dir, colmap_to_image[f]) for f in image_names]
-
-        # 3D points and {image_name -> [point_idx]}
-        points = manager.points3D.astype(np.float32)
-        points_err = manager.point3D_errors.astype(np.float32)
-        points_rgb = manager.point3D_colors.astype(np.uint8)
-        point_indices = dict()
-
-        image_id_to_name = {v: k for k, v in manager.name_to_image_id.items()}
-        for point_id, data in manager.point3D_id_to_images.items():
-            for image_id, _ in data:
-                image_name = image_id_to_name[image_id]
-                point_idx = manager.point3D_id_to_point3D_idx[point_id]
-                point_indices.setdefault(image_name, []).append(point_idx)
+
+        # --- fix ---
+        # colmap_to_image = dict(zip(colmap_files, image_files))
+        # image_paths = [os.path.join(image_dir, colmap_to_image[f]) for f in image_names]
+        
+        # キーは“正規化した colmap_files”、値はディスク上の相対パス（そのまま）
+        colmap_to_image = { _norm_key(k): v for k, v in zip(colmap_files, image_files) }
+
+        # 参照側（COLMAP名）も正規化して引く。最後に normpath でOS向けに整形。
+        image_paths = [
+            os.path.normpath(os.path.join(image_dir, colmap_to_image[_norm_key(f)]))
+            for f in image_names
+        # --- fix 
+]
+
+        # 3D points
+        points3D = manager.points3D.values()
+        points_err = np.array([p.error for p in points3D], dtype=np.float32)
+        points_rgb = np.array([p.color for p in points3D], dtype=np.uint8)
+        points = np.array([p.xyz for p in points3D], dtype=np.float32)
+        
         point_indices = {
             k: np.array(v).astype(np.int32) for k, v in point_indices.items()
         }
diff --git a/examples/requirements.txt b/examples/requirements.txt
@@ -1,7 +1,8 @@
 # assume torch is already installed
 
 # pycolmap for data parsing
-git+https://github.com/rmbrualla/pycolmap@cc7ea4b7301720ac29287dbe450952511b32125e
+# git+https://github.com/rmbrualla/pycolmap@cc7ea4b7301720ac29287dbe450952511b32125e
+pycolmap
 # (optional) nerfacc for torch version rasterization 
 # git+https://github.com/nerfstudio-project/nerfacc
 
@@ -20,5 +21,5 @@ tensorly
 pyyaml
 matplotlib
 git+https://github.com/rahul-goel/fused-ssim@328dc9836f513d00c4b5bc38fe30478b4435cbb5
-git+https://github.com/harry7557558/fused-bilagrid@90f9788e57d3545e3a033c1038bb9986549632fe
+#git+https://github.com/harry7557558/fused-bilagrid@90f9788e57d3545e3a033c1038bb9986549632fe
 splines