LoginSignup
0
0

More than 5 years have passed since last update.

mnistデータセットをWEB上に持ち込む

Last updated at Posted at 2018-01-11

概要

mnistデータセットをWEB上に持ち込んでみた。

写真

取得

import _pickle as cPickle
import gzip
import os
import urllib.request

url_base = 'http://deeplearning.net/data/mnist/'
file_name = "mnist.pkl.gz"
dataset_dir = os.path.dirname(os.path.abspath(__file__))
file_path = dataset_dir + "/" + file_name
print ("Downloading " + file_name + " ... ")
#urllib.request.urlretrieve(url_base + file_name, file_path)
print ("Done")
f = gzip.open(file_path, 'rb')
train_set, valid_set, test_set = cPickle.load(f, encoding = 'latin1')
f.close()
print (train_set[0].shape)
print (train_set[1].shape)
print (valid_set[0].shape)
print (valid_set[1].shape)
print (test_set[0].shape)
print (test_set[1].shape)
Downloading mnist.pkl.gz ... 
Done
(50000, 784)
(50000,)
(10000, 784)
(10000,)
(10000, 784)
(10000,)

加工

import _pickle as cPickle
import gzip
import numpy
from scipy.misc import imsave
import os

file_name = "mnist.pkl.gz"
dataset_dir = os.path.dirname(os.path.abspath(__file__))
file_path = dataset_dir + "/" + file_name
f = gzip.open(file_path, 'rb')
train_set, valid_set, test_set = cPickle.load(f, encoding='latin1')
f.close()
x = numpy.concatenate((train_set[0] * 255, valid_set[0] * 255, test_set[0][ : 3000, : ] * 255))
for i in range(20):
    imsave(dataset_dir + "/" + 'mnist_batch_' + str(i) + '.png', x[3000 * i : 3000 * (i + 1), : ])
    print (i)
imsave(dataset_dir + "/" + 'mnist_batch_' + str(20) + '.png', x[60000 : , : ])
L = 'var labels=' + str(list(numpy.concatenate((train_set[1], valid_set[1], test_set[1])))) + ';\n'
open(dataset_dir + "/" + 'mnist_labels.js', 'w').write(L)
print ("Done!")

利用

var url = {
    '0': '/assets/A/j/W/3/AjW3t.png',
    '1': '/assets/e/l/w/h/elwhh.png',
    '2': '/assets/Y/0/k/e/Y0kel.png',
    '3': '/assets/8/O/x/F/8OxFx.png',
    '4': '/assets/q/P/x/6/qPx60.png',
    '5': '/assets/6/j/8/t/6j8tg.png',
    '6': '/assets/S/H/y/u/SHyuZ.png',
    '7': '/assets/q/Z/G/o/qZGoq.png',
    '8': '/assets/o/D/k/E/oDkE9.png',
    '9': '/assets/2/N/L/G/2NLGe.png',
    '10': '/assets/Y/b/w/I/YbwIP.png',
    '11': '/assets/w/e/S/9/weS9c.png',
    '12': '/assets/K/9/6/V/K96Ve.png',
    '13': '/assets/G/2/Y/x/G2Yxd.png',
    '14': '/assets/K/X/8/d/KX8dt.png',
    '15': '/assets/e/8/E/6/e8E6f.png',
    '16': '/assets/S/N/1/Z/SN1Z2.png',
    '17': '/assets/C/5/G/G/C5GGt.png',
    '18': '/assets/s/8/n/V/s8nVW.png',
    '19': '/assets/2/E/a/d/2EadT.png',
    '20': '/assets/K/c/m/W/KcmWd.png',
};
var num_batches = 21;
var data_img_elts = new Array(num_batches);
var img_data = new Array(num_batches);
var loaded = new Array(num_batches);
var loaded_train_batches = [];
var canvas = document.getElementById('canvas')
var ctx = canvas.getContext('2d');
ctx.font = 'bold 15pt Meiryo';
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
function  draw(batch_num) {
    for (var i = 0; i < 120; i++)
    {
        var canv = document.createElement('canvas');
        canv.width = 28;
        canv.height = 28;
        var ctxt = canv.getContext('2d');
        var g = ctxt.createImageData(28, 28);
        for (var j = 0; j < 784; j++)
        {            
            var pp = j * 4;
            var t = i * 784 * 4 + pp;
            var d = img_data[batch_num].data[t];
            for (var k = 0; k < 3; k++)
            {
                g.data[pp + k] = d;
            } 
            g.data[pp + 3] = 255; 
        }
        var x = (i % 12) * 30;
        var y = Math.floor(i / 12) * 50;
        var l = labels[i];
        ctx.fillText(l, x + 10, y + 40);
        ctx.putImageData(g, x, y);
    }  
}
function load_data_batch(batch_num) {
    data_img_elts[batch_num] = new Image();
    var data_img_elt = data_img_elts[batch_num];  
    data_img_elt.onload = function() { 
        var data_canvas = document.createElement('canvas');
        var data_ctx = data_canvas.getContext("2d");
        data_canvas.width = data_img_elt.width;
        data_canvas.height = data_img_elt.height;
        data_ctx.drawImage(data_img_elt, 0, 0);
        img_data[batch_num] = data_ctx.getImageData(0, 0, data_canvas.width, data_canvas.height);
        loaded[batch_num] = true;
        if (batch_num < 20)
        {
            loaded_train_batches.push(batch_num);
        }
        draw(batch_num);
    };
    data_img_elt.src = url[batch_num];
}
for (var k = 0; k < loaded.length; k++)
{
    loaded[k] = false; 
}
load_data_batch(0);

成果物

以上。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0