More than 5 years have passed since last update.

国土地理院のデータを使ってみる。

Posted at 2019-06-01

pythonの勉強用に何か作ろうと思った時に作ったものです。

国土地理院のデータは以下のURLからDLできるものを使用しました。
http://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-v2_3.html

環境は、Jupyterです。

以下コード。

from PIL import Image,ImageDraw,ImageFont,ImageOps
import matplotlib.pyplot as plt
import numpy as np
import re
import codecs                            #UTF-8でオープンするために必要


files = [
         './N03-190101_08_GML/N03-19_08_190101.xml',      #  茨城
         './N03-190101_09_GML/N03-19_09_190101.xml',      #  栃木
         './N03-190101_10_GML/N03-19_10_190101.xml',      #  群馬
         './N03-190101_11_GML/N03-19_11_190101.xml',      #  埼玉
         './N03-190101_12_GML/N03-19_12_190101.xml',      #  千葉
         './N03-190101_13_GML/N03-19_13_190101.xml',      #  東京
         './N03-190101_14_GML/N03-19_14_190101.xml',      #  神奈川
         './N03-190101_19_GML/N03-19_19_190101.xml',      #  山梨
         './N03-190101_22_GML/N03-19_22_190101.xml',      #  静岡
        ]


#取得するデータ情報
# 本来はXML形式なのでちゃんとパースして使うべきなのでしょうが、今回は市町村の境界をプロットできれば良い。
tag_Position = '\t\t\t\t([0-9.]+) ([0-9.]+)'

tag_start_token ='<gml:posList>'
tag_end_token ='</gml:posList>'
tag_in_flag = False

tag_Surface_start_token = '<gml:Surface gml:id=\"sf([0-9.]+)\">'


#縮尺
Scale = 150000 #* 4

poss = []
file_count = 0

tag_end_count = 0
#ファイル開いて
for filename in files:
    print( filename )
    
    test_data = codecs.open(filename,"r",'utf-8')
    # 行ごとにすべて読み込んでリストデータにする
    lines = test_data.readlines()

    # 一行ずつ処理する
    for line in lines:
        tag = re.search(tag_start_token , line)
        if(tag):
            tag_in_flag = True
        else:
            tag = re.search(tag_end_token , line)
            if(tag):
                tag_in_flag = False
                tag_end_count =  tag_end_count + 1
            
        
        pos = re.search(tag_Position , line)
        #正規表現でヒットしたので境界座標と判断
        if(pos):
            if(tag_in_flag==True):
                #小数点の演算がメンドクサイのでここで、"."を外して正数化
                #タグ情報を配列に
                if( int(pos.group(1).split('.')[0]) > 33):  # 東京都が大きすぎるので３３度以下は対象外にする
                    poss.append( [ int(pos.group(1).replace('.', '')), int(pos.group(2).replace('.', '') ), file_count ] )

    # ファイルをクローズする
    test_data.close()
    #処理済みファイルカウンタをインクリメント
    file_count = file_count + 1

print('tag_end_count = ',tag_end_count)
#イメージにしやすいように係数化する。
nparr = np.array(poss)
sub =  np.min(nparr, axis=0)

#平均値が入っている配列を作成
np_sub = np.full((nparr.shape[0], 3),[ int(sub[0]), int(sub[1]), 0 ] )

#最大と対象から値の幅を算出
npimg_temp  = nparr - np_sub

#縮小
npimg_temp  = npimg_temp / [Scale,Scale,1]  #縮小

#整数化
npimg = np.array(npimg_temp, dtype='int')

#イメージとして書いてみる
#  画像のサイズを算出
print("min",np.min(npimg, axis=0))
print("max",np.max(npimg, axis=0))
img_size = np.max(npimg, axis=0) - np.min(npimg, axis=0)

space = 10
space_half = int(space / 2)

images = []

plot_count = 0;

#  ベース画像
img2 = Image.new('RGB',(img_size[1]+space,img_size[0]+space),(255,255,255))

point_cont = len(npimg)
for xy in npimg:
    temp_color = 60
    img2.putpixel((xy[1] + space_half,xy[0] + space_half), 
                  (int((xy[2] % 3)* temp_color),
                   int((xy[2] / 3)* temp_color),
                   int((xy[2] % 3)* temp_color)))
    plot_count += 1
    if(plot_count > point_cont/ 200 ):
        images.append(ImageOps.flip(img2))    
        plot_count = 0
    
img2 = ImageOps.flip(img2)
img2.save('test.png', quality=95)
plt.imshow( img2 )
images.append(img2)
images[0].save('pillow_imagedraw.gif',
               save_all=True, append_images=images[1:], optimize=False, duration=40, loop=0)

./N03-190101_08_GML/N03-19_08_190101.xml
./N03-190101_09_GML/N03-19_09_190101.xml
./N03-190101_10_GML/N03-19_10_190101.xml
./N03-190101_11_GML/N03-19_11_190101.xml
./N03-190101_12_GML/N03-19_12_190101.xml
./N03-190101_13_GML/N03-19_13_190101.xml
./N03-190101_14_GML/N03-19_14_190101.xml
./N03-190101_19_GML/N03-19_19_190101.xml
./N03-190101_22_GML/N03-19_22_190101.xml
tag_end_count = 11225
min [0 0 0]
max [2074 2271 8]

上のコードでは、画像へのプロッティング処理中の画像を使ってGIFを作成しています。

今回は、座標データしか使っていませんが、いろんなデータが含まれるので色々と活用できそうですね。

もちろん、著作権とかへの考慮は必要ですが。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up