0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

EVO-X2でDepth Anything 3やーる(Windows11、Python3.12)

Posted at

はじめに

EVO-X2でDepth Anything 3を動かしてみました

開発環境

導入

下記を参考に、Pythonプログラムを作成

da3.py
import torch
import subprocess
import sys
import os
import time
import glob
import shutil
from datetime import datetime

# Pythonバージョンチェック
if sys.version_info < (3, 8):
    print(f"❌ Python 3.8以降が必要です。現在のバージョン: {sys.version}")
    sys.exit(1)

print(f"✅ Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")

# パッケージのインストール確認
required_packages = {
    'depth_anything_3': 'awesome-depth-anything-3',
    'addict': 'addict',
    'matplotlib': 'matplotlib',
    'PIL': 'pillow',
    'numpy': 'numpy',
    'torch': 'torch',
}

missing_packages = []
for module_name, package_name in required_packages.items():
    try:
        __import__(module_name)
    except ImportError:
        missing_packages.append(package_name)

if missing_packages:
    print(f"📦 Installing missing packages: {', '.join(missing_packages)}...")
    subprocess.check_call([sys.executable, "-m", "pip", "install"] + missing_packages)

# 再度インポート
from depth_anything_3.api import DepthAnything3
from depth_anything_3.utils.export.glb import export_to_glb
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import urllib.request

# ============================================================================
# 1. デバイス設定
# ============================================================================
device = "cuda" if torch.cuda.is_available() else "cpu"
gpu_name = torch.cuda.get_device_name(0) if device == "cuda" else "None"
vram = torch.cuda.get_device_properties(0).total_memory / 1e9 if device == "cuda" else 0

if device == "cuda":
    print(f"✅ GPU detected: {gpu_name} ({vram:.1f} GB VRAM)")
else:
    print("⚠️ No GPU detected. Using CPU (will be slower)")

# ============================================================================
# 2. モデルの読み込み
# ============================================================================
model_size = "DA3-LARGE"  # オプション: "DA3-SMALL", "DA3-BASE", "DA3-LARGE", "DA3-GIANT", "DA3NESTED-GIANT-LARGE"

print(f"\n📥 Loading {model_size}...")
print("| Model | Speed | Quality | VRAM |")
print("|-------|-------|---------|------|")
print("| SMALL | ⚡⚡⚡ | ★★☆ | 4GB |")
print("| BASE | ⚡⚡ | ★★★ | 6GB |")
print("| LARGE | ⚡ | ★★★★ | 8GB |")
print("| GIANT | 🐢 | ★★★★★ | 12GB |")
print("| NESTED | 🐢 | ★★★★★+ | 16GB |")

start = time.time()
model = DepthAnything3.from_pretrained(f"depth-anything/{model_size}")
model = model.to(device).eval()
print(f"✅ Model loaded in {time.time()-start:.1f}s")

# ============================================================================
# 3. サンプル画像で試す
# ============================================================================
print("\n🖼️ Testing with sample image...")

os.makedirs("samples", exist_ok=True)
url = "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?w=1280"
sample_path = "samples/mountain.jpg"

if not os.path.exists(sample_path):
    print("📥 Downloading sample image...")
    urllib.request.urlretrieve(url, sample_path)

# 推論実行
result = model.inference([sample_path])

# 可視化
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].imshow(result.processed_images[0])
axes[0].set_title("📸 Input", fontsize=14, fontweight='bold')
axes[0].axis("off")

depth = result.depth[0]
im = axes[1].imshow(depth, cmap='Spectral_r')
axes[1].set_title(f"🌊 Depth (range: {depth.min():.1f}m - {depth.max():.1f}m)", fontsize=14, fontweight='bold')
axes[1].axis("off")
plt.colorbar(im, ax=axes[1], fraction=0.046, pad=0.04, label='Depth (m)')

plt.tight_layout()
plt.savefig("samples/depth_result.png", dpi=150, bbox_inches='tight')
print("💾 Saved visualization to samples/depth_result.png")
plt.close()

print(f"\n📊 Output shapes:")
print(f"   Depth: {result.depth.shape}")
print(f"   Confidence: {result.conf.shape}")
print(f"   Camera intrinsics: {result.intrinsics.shape}")

# ============================================================================
# 4. 独自の画像で3D再構成
# ============================================================================
print("\n" + "="*60)
print("📤 4. Use Your Own Images")
print("="*60)

# 画像ディレクトリを指定(必要に応じて変更)
upload_dir = "my_images"  # または画像があるディレクトリのパスを指定

print(f"\n💡 Tips for best results:")
print("   - Move the camera, not the objects")
print("   - 30-50% overlap between consecutive images")
print("   - Avoid motion blur")
print("   - Good lighting helps!")

if not os.path.exists(upload_dir):
    print(f"\n❌ Directory '{upload_dir}' not found.")
    print(f"Please create the directory and add your images there.")
    print(f"   mkdir {upload_dir}")
    image_files = []
else:
    image_files = sorted(glob.glob(f"{upload_dir}/*.jpg") + 
                        glob.glob(f"{upload_dir}/*.jpeg") + 
                        glob.glob(f"{upload_dir}/*.png") + 
                        glob.glob(f"{upload_dir}/*.webp"))

    print(f"\n✅ Found {len(image_files)} images in '{upload_dir}'")

    # プレビュー
    if len(image_files) > 0:
        n_preview = min(6, len(image_files))
        fig, axes = plt.subplots(1, n_preview, figsize=(3*n_preview, 3))
        if n_preview == 1:
            axes = [axes]
        for i, img_path in enumerate(image_files[:n_preview]):
            img = Image.open(img_path)
            axes[i].imshow(img)
            axes[i].set_title(f"#{i+1}", fontsize=10)
            axes[i].axis("off")
        if len(image_files) > n_preview:
            print(f"   (showing first {n_preview} of {len(image_files)})")
        plt.tight_layout()
        plt.savefig("samples/image_preview.png", dpi=150, bbox_inches='tight')
        plt.close()

# 3D再構成
if len(image_files) >= 2:
    print(f"\n🔄 Processing {len(image_files)} images...")
    start = time.time()

    result = model.inference(
        image_files,
        process_res_method="upper_bound_resize",
    )

    inference_time = time.time() - start
    print(f"✅ Inference done in {inference_time:.1f}s ({len(image_files)/inference_time:.1f} img/s)")

    # GLBにエクスポート
    output_dir = "output_3d"
    os.makedirs(output_dir, exist_ok=True)

    print("📦 Generating 3D point cloud...")
    export_to_glb(
        result,
        export_dir=output_dir,
        show_cameras=True,
        conf_thresh_percentile=20,  # Filter low-confidence points
        num_max_points=500_000,
    )

    print(f"\n✅ 3D model saved to {output_dir}/")
    if os.path.exists(output_dir):
        files = os.listdir(output_dir)
        for f in files:
            filepath = os.path.join(output_dir, f)
            size = os.path.getsize(filepath) / (1024 * 1024)  # MB
            print(f"  📄 {f}: {size:.2f} MB")

    glb_file = os.path.join(output_dir, "point_cloud.glb")
    if os.path.exists(glb_file):
        print(f"\n🎉 GLB file created!")
        print(f"📁 Location: {os.path.abspath(glb_file)}")
        print(f"\n👉 View your model: https://gltf-viewer.donmccurdy.com/")
elif len(image_files) == 1:
    print("\n⚠️ At least 2 images are needed for 3D reconstruction.")
else:
    print("\n⚠️ No images found. Skipping 3D reconstruction.")

# ============================================================================
# 5. 結果の可視化
# ============================================================================
if 'result' in locals() and len(result.depth) > 0 and len(result.depth) > 1:
    print("\n" + "="*60)
    print("📊 5. Visualize Results")
    print("="*60)

    # 深度マップの表示
    n_images = len(result.depth)
    cols = min(4, n_images)
    rows = (n_images + cols - 1) // cols

    fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))
    axes = np.array(axes).flatten() if n_images > 1 else [axes]

    for i in range(n_images):
        depth = result.depth[i]
        axes[i].imshow(depth, cmap='Spectral_r')
        axes[i].set_title(f"Frame {i+1}", fontsize=10)
        axes[i].axis("off")

    # 未使用のサブプロットを非表示
    for i in range(n_images, len(axes)):
        axes[i].axis("off")

    plt.suptitle("🌊 Depth Maps", fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.savefig("samples/all_depth_maps.png", dpi=150, bbox_inches='tight')
    print("💾 Saved depth maps to samples/all_depth_maps.png")
    plt.close()

    # カメラポーズの可視化
    if len(result.extrinsics) > 0:
        from mpl_toolkits.mplot3d import Axes3D

        positions = []
        for ext in result.extrinsics:
            R = ext[:3, :3]
            t = ext[:3, 3]
            cam_pos = -R.T @ t
            positions.append(cam_pos)

        positions = np.array(positions)

        fig = plt.figure(figsize=(10, 8))
        ax = fig.add_subplot(111, projection='3d')

        ax.scatter(positions[:, 0], positions[:, 1], positions[:, 2],
                   c=range(len(positions)), cmap='viridis', s=100, marker='o')
        ax.plot(positions[:, 0], positions[:, 1], positions[:, 2],
                'b-', alpha=0.5, linewidth=1)
        ax.scatter(*positions[0], c='green', s=200, marker='^', label='First')
        ax.scatter(*positions[-1], c='red', s=200, marker='v', label='Last')

        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        ax.set_zlabel('Z')
        ax.set_title('📷 Camera Trajectory', fontsize=14, fontweight='bold')
        ax.legend()

        plt.tight_layout()
        plt.savefig("samples/camera_trajectory.png", dpi=150, bbox_inches='tight')
        print("💾 Saved camera trajectory to samples/camera_trajectory.png")
        plt.close()

        print(f"📍 {len(positions)} camera poses estimated")

print("\n" + "="*60)
print("✅ Processing complete!")
print("="*60)

ライブラリのインストール

pip install awesome-depth-anything-3 matplotlib pillow numpy torch

画像を my_images/ フォルダに配置

mkdir my_images

000.png
000.png

010.png
010.png

実行

実行してみましょう

python da3.py
✅ Python 3.12.12
[INFO ] ModelCache initialized
📦 Installing missing packages: awesome-depth-anything-3, addict...
[WARN ] Dependency `gsplat` is required for rendering 3DGS. Install via: pip install git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70
⚠️ No GPU detected. Using CPU (will be slower)

📥 Loading DA3-LARGE...
| Model | Speed | Quality | VRAM |
|-------|-------|---------|------|
| SMALL | ⚡⚡⚡ | ★★☆ | 4GB |
| BASE | ⚡⚡ | ★★★ | 6GB |
| LARGE | ⚡ | ★★★★ | 8GB |
| GIANT | 🐢 | ★★★★★ | 12GB |
| NESTED | 🐢 | ★★★★★+ | 16GB |
config.json: 1.21kB [00:00, 1.21MB/s]
[INFO ] Model cache MISS: da3-large on cpu. Loading...
[INFO ] using MLP layer as FFN
[INFO ] Model cached: da3-large on cpu
[INFO ] Using standard InputProcessor (optimized CPU pipeline)
model.safetensors: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.64G/1.64G [01:29<00:00, 18.3MB/s]
✅ Model loaded in 92.7s

🖼️ Testing with sample image...
📥 Downloading sample image...
[INFO ] Processed Images Done taking 0.07385778427124023 seconds. Shape:  torch.Size([1, 3, 336, 504])
[INFO ] Model Forward Pass Done. Time: 5.336712837219238 seconds
[INFO ] Conversion to Prediction Done. Time: 0.0010001659393310547 seconds
💾 Saved visualization to samples/depth_result.png

📊 Output shapes:
   Depth: (1, 336, 504)
   Confidence: (1, 336, 504)
   Camera intrinsics: (1, 3, 3)

サンプル画像の深度推定結果

samples/depth_result.png

============================================================
📤 4. Use Your Own Images
============================================================

💡 Tips for best results:
   - Move the camera, not the objects
   - 30-50% overlap between consecutive images
   - Avoid motion blur
   - Good lighting helps!

✅ Found 2 images in 'my_images'

🔄 Processing 2 images...
[INFO ] Processed Images Done taking 0.02064800262451172 seconds. Shape:  torch.Size([2, 3, 280, 504])
[INFO ] Model Forward Pass Done. Time: 8.261384725570679 seconds
[INFO ] Conversion to Prediction Done. Time: 0.0 seconds
✅ Inference done in 8.3s (0.2 img/s)
📦 Generating 3D point cloud...
[INFO ] conf_thresh_percentile: 20
[INFO ] num max points: 500000
[INFO ] Exporting to GLB with num_max_points: 500000
'cp' は、内部コマンドまたは外部コマンド、
操作可能なプログラムまたはバッチ ファイルとして認識されていません。

✅ 3D model saved to output_3d/
  📄 depth_vis: 0.00 MB
  📄 scene.glb: 2.07 MB

============================================================
📊 5. Visualize Results
============================================================
💾 Saved depth maps to samples/all_depth_maps.png
💾 Saved camera trajectory to samples/camera_trajectory.png
📍 2 camera poses estimated

============================================================
✅ Processing complete!
============================================================

my_images/に配置した2枚の画像

20251210-002203-b17da085.png

深度推定結果
output_3d/depth_vis

0000.jpg
0000.jpg

0001.jpg
0001.jpg

samples/
samples/all_depth_maps.png

カメラの軌跡
samples/camera_trajectory.png

2枚の画像から3D再構築したモデル
output_3d/scene.glb

お疲れさまでした。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?