今回利用したPyTorchライブラリ
【 結果 】 入力した画像と、出力された画像
( 入力画像 )
( 出力画像 )
- ファイル保存なし
- plt.show()でウィンドウ出力のみ
単眼カメラのレンズに近い近距離の物体ほど、明るい黄色で表現されている (正常動作)。
手元に画像ファイル(jpg, jpeg, png)を用意。
Terminal
electron@diynoMacBook-Pro catr % cd image_files
electron@diynoMacBook-Pro image_files % ls
special.jp_解析結果.jpeg test002.jpg test005.jp_解析結果.jpeg test008.jpe_解析結果.jpeg
special.jpg test003.jpg test005.jpg test008.jpeg
test001.png test004.jpg test007.jpeg test010.jpg
electron@diynoMacBook-Pro image_files %
公式サイトのサンプルコードを写経して実行
Terminal
electron@diynoMacBook-Pro image_files % pip3 install timm
Terminal
electron@diynoMacBook-Pro image_files % python3
Python 3.9.6 (default, Jun 29 2021, 06:20:32)
[Clang 12.0.0 (clang-1200.0.32.29)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import cv2
>>> import torch
>>> import urllib.request
>>> import matplotlib.pyplot as plt
>>>
>>> url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
>>> urllib.request.urlretrieve(url, filename)
('dog.jpg', <http.client.HTTPMessage object at 0x13352a430>)
>>>
>>> model_type = "DPT_Large"
>>> midas = torch.hub.load("intel-isl/MiDaS", model_type)
Downloading: "https://github.com/intel-isl/MiDaS/archive/master.zip" to /Users/electron/.cache/torch/hub/master.zip
Downloading: "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt" to /Users/electron/.cache/torch/hub/checkpoints/dpt_large-midas-2f21e586.pt
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.28G/1.28G [00:31<00:00, 44.1MB/s]
>>>
>>> device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
>>> print(device)
cpu
>>>
>>> midas.to(device)
DPTDepthModel(
(pretrained): Module(
(model): VisionTransformer(
(patch_embed): PatchEmbed(
(proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
)
(pos_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(
(0): Block(
(norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=1024, out_features=3072, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=1024, out_features=1024, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
( 省略 )
(skip_add): FloatFunctional(
(activation_post_process): Identity()
)
)
(output_conv): Sequential(
(0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): Interpolate()
(2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): Conv2d(32, 1, kernel_size=(1, 1), stride=(1, 1))
(5): ReLU(inplace=True)
(6): Identity()
)
)
)
>>>
>>> midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
Using cache found in /Users/electron/.cache/torch/hub/intel-isl_MiDaS_master
>>>
>>> if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
... transform = midas_transforms.dpt_transform
... else:
... transform = midas_transforms.small_transform
...
>>>
>>> print(transform)
Compose(
<function transforms.<locals>.<lambda> at 0x13cdd5160>
<midas.transforms.Resize object at 0x13ce46cd0>
<midas.transforms.NormalizeImage object at 0x13ce46ca0>
<midas.transforms.PrepareForNet object at 0x13ce46c40>
<function transforms.<locals>.<lambda> at 0x13cdd5310>
)
>>>
>>> img = cv2.imread("test007.jpeg")
>>> img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
>>> input_batch = transform(img).to(device)
>>>
>>> with torch.no_grad():
... prediction = midas(input_batch)
... prediction = torch.nn.functional.interpolate(
... prediction.unsqueeze(1),
... size=img.shape[:2],
... mode="bicubic",
... align_corners=False,
... ).squeeze()
...
/usr/local/lib/python3.9/site-packages/torch/nn/functional.py:3451: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
warnings.warn(
>>>
>>> output = prediction.cpu().numpy()
>>>
>>> import types
>>> print(type(output))
<class 'numpy.ndarray'>
>>>
>>> plt.imshow(output)
<matplotlib.image.AxesImage object at 0x13ce6fd60>
>>>
>>> plt.show()
>>> quit()
electron@diynoMacBook-Pro image_files %