概要
shardingすると可用性が問題になってくるのでレプリケーションと組み合わせて構成を組んで対処することもある。下記の図のようにshardingしているサーバが1台でもだめになると障害となってしまう。
そこで下記のようにshardingとレプリケーションを組み合わせて構成を組む。
db1にシャーディングしていたshard_Aをdb2とdb3にレプリケーションする。
db2にシャーディングしていたshard_Bをdb1とdb3にレプリケーションする。
db3にシャーディングしていたshard_Cをdb1とdb2にレプリケーションする。
以後さらにスケールアウトすることを想定してArbiterを現時点から入れておくものとする。Arbiterはデータを保持しないため、サーバ1台の障害にまで耐えられるという想定だ。
もしdb2に障害が起きてダウンしてしまった場合は下記のようになる。
shard_Aのレプリカセットはdb1がそのままPrimaryとなる。
shard_BのレプリカセットのArbiterはdb3のshard_BのSecondaryをPrimaryに昇格させる。
shard_Cのレプリカセットはdb3がそのままPrimaryとなる。
準備
EC2で4台構成で試す。
mongoのバージョンは3.2.9
core1, db1, db2, db3の構成
core1ではconfigサーバとmongosサーバを起動する。
wget http://downloads.mongodb.org/linux/mongodb-linux-x86_64-rhel62-3.2.9.tgz
configsvrとmongosの設定と起動
[root@core1 mongodb]# cat conf/config.conf
port = 27001
dbpath = /root/mongodb/data/configdb
fork = true
configsvr = true
logpath = /root/mongodb/logs/config.log
logappend = yes
smallfiles = true
[root@core1 mongodb]# cat conf/mongos.conf
port = 27000
configdb = core1:27001
chunkSize = 2
fork = true
logpath = /root/mongodb/logs/shard.log
logappend = yes
起動する
[root@core1 mongodb]# ./bin/mongod -f conf/config.conf
[root@core1 mongodb]# ./bin/mongos -f conf/mongos.conf
[root@core1 mongodb]# pgrep -lf mongo
3259 ./bin/mongod -f conf/config.conf
3280 ./bin/mongos -f conf/mongos.conf
mongodの設定と起動(db1,db2,db3)
db1
[root@db1 mongodb]# cat conf/rs_shard_A.conf
port = 27011
dbpath = /root/mongodb/data/rs_shard_A
fork = true
shardsvr = true
replSet = shard_A
logpath = /root/mongodb/logs/rs_shard_A.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db1 mongodb]# cat conf/rs_shard_B.conf
port = 27012
dbpath = /root/mongodb/data/rs_shard_B
fork = true
shardsvr = true
replSet = shard_B
logpath = /root/mongodb/logs/rs_shard_B.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db1 mongodb]# cat conf/rs_shard_C.conf
port = 27013
dbpath = /root/mongodb/data/rs_shard_C
fork = true
shardsvr = true
replSet = shard_C
logpath = /root/mongodb/logs/rs_shard_C.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db1 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db1 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db1 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db1 mongodb]# pgrep -lf mongo
3180 ./bin/mongod -f conf/rs_shard_A.conf
3198 ./bin/mongod -f conf/rs_shard_B.conf
3216 ./bin/mongod -f conf/rs_shard_C.conf
db2
[root@db2 mongodb]# cat conf/rs_shard_A.conf
port = 27011
dbpath = /root/mongodb/data/rs_shard_A
fork = true
shardsvr = true
replSet = shard_A
logpath = /root/mongodb/logs/rs_shard_A.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db2 mongodb]# cat conf/rs_shard_B.conf
port = 27012
dbpath = /root/mongodb/data/rs_shard_B
fork = true
shardsvr = true
replSet = shard_B
logpath = /root/mongodb/logs/rs_shard_B.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db2 mongodb]# cat conf/rs_shard_C.conf
port = 27013
dbpath = /root/mongodb/data/rs_shard_C
fork = true
shardsvr = true
replSet = shard_C
logpath = /root/mongodb/logs/rs_shard_C.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db2 mongodb]# pgrep -lf mongo
3046 ./bin/mongod -f conf/rs_shard_A.conf
3064 ./bin/mongod -f conf/rs_shard_B.conf
3082 ./bin/mongod -f conf/rs_shard_C.conf
db3
[root@db3 mongodb]# cat conf/rs_shard_A.conf
port = 27011
dbpath = /root/mongodb/data/rs_shard_A
fork = true
shardsvr = true
replSet = shard_A
logpath = /root/mongodb/logs/rs_shard_A.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db3 mongodb]# cat conf/rs_shard_B.conf
port = 27012
dbpath = /root/mongodb/data/rs_shard_B
fork = true
shardsvr = true
replSet = shard_B
logpath = /root/mongodb/logs/rs_shard_B.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db3 mongodb]# cat conf/rs_shard_C.conf
port = 27013
dbpath = /root/mongodb/data/rs_shard_C
fork = true
shardsvr = true
replSet = shard_C
logpath = /root/mongodb/logs/rs_shard_C.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db3 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db3 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db3 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db3 mongodb]# pgrep -lf mongo
3120 ./bin/mongod -f conf/rs_shard_A.conf
3138 ./bin/mongod -f conf/rs_shard_B.conf
3156 ./bin/mongod -f conf/rs_shard_C.conf
レプリカセットの設定
db1で実施
[root@db1 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
> rs.status()
{
"info" : "run rs.initiate(...) if not yet done for the set",
"ok" : 0,
"errmsg" : "no replset config has been received",
"code" : 94
}
> rs.initiate()
{
"info2" : "no configuration specified. Using a default configuration for the set",
"me" : "db1:27011",
"ok" : 1
}
shard_A:OTHER> rs.add("db2:27011")
{ "ok" : 1 }
shard_A:PRIMARY> rs.addArb("db3:27011")
{ "ok" : 1 }
shard_A:PRIMARY> var config = rs.config(); config.members[0].priority=2; rs.reconfig(config)
{ "ok" : 1 }
shard_A:PRIMARY> rs.status()
{
"set" : "shard_A",
"date" : ISODate("2017-01-29T22:46:24.314Z"),
"myState" : 1,
"term" : NumberLong(1),
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "db1:27011",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 1381,
"optime" : {
"ts" : Timestamp(1485729778, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2017-01-29T22:42:58Z"),
"electionTime" : Timestamp(1485729680, 2),
"electionDate" : ISODate("2017-01-29T22:41:20Z"),
"configVersion" : 4,
"self" : true
},
{
"_id" : 1,
"name" : "db2:27011",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 243,
"optime" : {
"ts" : Timestamp(1485729778, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2017-01-29T22:42:58Z"),
"lastHeartbeat" : ISODate("2017-01-29T22:46:22.668Z"),
"lastHeartbeatRecv" : ISODate("2017-01-29T22:46:22.668Z"),
"pingMs" : NumberLong(0),
"syncingTo" : "db1:27011",
"configVersion" : 4
},
{
"_id" : 2,
"name" : "db3:27011",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 235,
"lastHeartbeat" : ISODate("2017-01-29T22:46:22.668Z"),
"lastHeartbeatRecv" : ISODate("2017-01-29T22:46:23.611Z"),
"pingMs" : NumberLong(0),
"configVersion" : 4
}
],
"ok" : 1
}
db2で実施
[root@db2 mongodb]# ./bin/mongo --port 27012
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
> rs.status()
{
"info" : "run rs.initiate(...) if not yet done for the set",
"ok" : 0,
"errmsg" : "no replset config has been received",
"code" : 94
}
> rs.initiate()
{
"info2" : "no configuration specified. Using a default configuration for the set",
"me" : "db2:27012",
"ok" : 1
}
shard_B:OTHER> rs.add("db3:27012")
{ "ok" : 1 }
shard_B:PRIMARY> rs.addArb("db1:27012")
{ "ok" : 1 }
shard_B:PRIMARY> var config = rs.config(); config.members[0].priority=2; rs.reconfig(config)
{ "ok" : 1 }
shard_B:PRIMARY> rs.status()
{
"set" : "shard_B",
"date" : ISODate("2017-01-29T22:45:17.677Z"),
"myState" : 1,
"term" : NumberLong(1),
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "db2:27012",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 1279,
"optime" : {
"ts" : Timestamp(1485729913, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2017-01-29T22:45:13Z"),
"infoMessage" : "could not find member to sync from",
"electionTime" : Timestamp(1485729859, 2),
"electionDate" : ISODate("2017-01-29T22:44:19Z"),
"configVersion" : 4,
"self" : true
},
{
"_id" : 1,
"name" : "db3:27012",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 21,
"optime" : {
"ts" : Timestamp(1485729913, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2017-01-29T22:45:13Z"),
"lastHeartbeat" : ISODate("2017-01-29T22:45:15.826Z"),
"lastHeartbeatRecv" : ISODate("2017-01-29T22:45:15.831Z"),
"pingMs" : NumberLong(0),
"syncingTo" : "db2:27012",
"configVersion" : 4
},
{
"_id" : 2,
"name" : "db1:27012",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 14,
"lastHeartbeat" : ISODate("2017-01-29T22:45:15.826Z"),
"lastHeartbeatRecv" : ISODate("2017-01-29T22:45:13.830Z"),
"pingMs" : NumberLong(0),
"configVersion" : 4
}
],
"ok" : 1
}
shard_B:PRIMARY>
db3で実施
[root@db3 mongodb]# ./bin/mongo --port 27013
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
> rs.status()
{
"info" : "run rs.initiate(...) if not yet done for the set",
"ok" : 0,
"errmsg" : "no replset config has been received",
"code" : 94
}
> rs.initiate()
{
"info2" : "no configuration specified. Using a default configuration for the set",
"me" : "db3:27013",
"ok" : 1
}
shard_C:OTHER> rs.add("db1:27013")
{ "ok" : 1 }
shard_C:PRIMARY> rs.addArb("db2:27013")
{ "ok" : 1 }
shard_C:PRIMARY> var config = rs.config(); config.members[0].priority=2; rs.reconfig(config)
{ "ok" : 1 }
shard_C:PRIMARY> rs.status()
{
"set" : "shard_C",
"date" : ISODate("2017-01-29T22:47:31.366Z"),
"myState" : 1,
"term" : NumberLong(1),
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "db3:27013",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 1380,
"optime" : {
"ts" : Timestamp(1485730047, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2017-01-29T22:47:27Z"),
"infoMessage" : "could not find member to sync from",
"electionTime" : Timestamp(1485730012, 2),
"electionDate" : ISODate("2017-01-29T22:46:52Z"),
"configVersion" : 4,
"self" : true
},
{
"_id" : 1,
"name" : "db1:27013",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 18,
"optime" : {
"ts" : Timestamp(1485730047, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2017-01-29T22:47:27Z"),
"lastHeartbeat" : ISODate("2017-01-29T22:47:31.233Z"),
"lastHeartbeatRecv" : ISODate("2017-01-29T22:47:31.238Z"),
"pingMs" : NumberLong(0),
"syncingTo" : "db3:27013",
"configVersion" : 4
},
{
"_id" : 2,
"name" : "db2:27013",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 11,
"lastHeartbeat" : ISODate("2017-01-29T22:47:31.233Z"),
"lastHeartbeatRecv" : ISODate("2017-01-29T22:47:27.237Z"),
"pingMs" : NumberLong(0),
"configVersion" : 4
}
],
"ok" : 1
}
shard_C:PRIMARY>
シャーディングの設定
core1で実施する。Arbiterはデータを保持しないのでシャーディングしない。
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
shards:
active mongoses:
"3.2.9" : 1
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
mongos> sh.addShard("shard_A/db1:27011,db2:27011")
{ "shardAdded" : "shard_A", "ok" : 1 }
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
shards:
{ "_id" : "shard_A", "host" : "shard_A/db1:27011,db2:27011" }
active mongoses:
"3.2.9" : 1
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
mongos> sh.addShard("shard_B/db2:27012,db3:27012")
{ "shardAdded" : "shard_B", "ok" : 1 }
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
shards:
{ "_id" : "shard_A", "host" : "shard_A/db1:27011,db2:27011" }
{ "_id" : "shard_B", "host" : "shard_B/db2:27012,db3:27012" }
active mongoses:
"3.2.9" : 1
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
mongos> sh.addShard("shard_C/db3:27013,db1:27013")
{ "shardAdded" : "shard_C", "ok" : 1 }
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
shards:
{ "_id" : "shard_A", "host" : "shard_A/db1:27011,db2:27011" }
{ "_id" : "shard_B", "host" : "shard_B/db2:27012,db3:27012" }
{ "_id" : "shard_C", "host" : "shard_C/db1:27013,db3:27013" }
active mongoses:
"3.2.9" : 1
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
データの投入
core1のmongosからコマンドを実行して投入
[root@core1 mongodb]# ./bin/mongo --port 27000
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27000/test
mongos> use logdb
switched to db logdb
mongos> for(var i=1; i<=100000; i++)db.logs.insert({"uid":i, "value":Math.floor(Math.random()*100000+1)})
WriteResult({ "nInserted" : 1 })
インデックスを貼ってシャーディングを実行
シャーディング実行後、しばらくしてstatusを確認するとshard_A, shard_B, shard_Cに確かに分散されていることが確認できる。
mongos> use logdb
switched to db logdb
mongos> db.logs.ensureIndex({uid:1})
{
"raw" : {
"shard_B/db2:27012,db3:27012" : {
"createdCollectionAutomatically" : false,
"numIndexesBefore" : 1,
"numIndexesAfter" : 2,
"ok" : 1,
"$gleStats" : {
"lastOpTime" : Timestamp(1485741329, 1),
"electionId" : ObjectId("7fffffff0000000000000002")
}
}
},
"ok" : 1
}
mongos> sh.enableSharding("logdb")
{ "ok" : 1 }
mongos> sh.shardCollection("logdb.logs", {uid:1})
{ "collectionsharded" : "logdb.logs", "ok" : 1 }
mongos> sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
shards:
{ "_id" : "shard_A", "host" : "shard_A/db1:27011,db2:27011" }
{ "_id" : "shard_B", "host" : "shard_B/db2:27012,db3:27012" }
{ "_id" : "shard_C", "host" : "shard_C/db1:27013,db3:27013" }
active mongoses:
"3.2.9" : 1
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
6 : Success
databases:
{ "_id" : "logdb", "primary" : "shard_B", "partitioned" : true }
logdb.logs
shard key: { "uid" : 1 }
unique: false
balancing: true
chunks:
shard_A 3
shard_B 4
shard_C 3
{ "uid" : { "$minKey" : 1 } } -->> { "uid" : 10486 } on : shard_A Timestamp(2, 0)
{ "uid" : 10486 } -->> { "uid" : 20972 } on : shard_C Timestamp(3, 0)
{ "uid" : 20972 } -->> { "uid" : 31458 } on : shard_A Timestamp(4, 0)
{ "uid" : 31458 } -->> { "uid" : 41944 } on : shard_C Timestamp(5, 0)
{ "uid" : 41944 } -->> { "uid" : 52430 } on : shard_A Timestamp(6, 0)
{ "uid" : 52430 } -->> { "uid" : 62916 } on : shard_C Timestamp(7, 0)
{ "uid" : 62916 } -->> { "uid" : 73402 } on : shard_B Timestamp(7, 1)
{ "uid" : 73402 } -->> { "uid" : 83888 } on : shard_B Timestamp(1, 7)
{ "uid" : 83888 } -->> { "uid" : 94374 } on : shard_B Timestamp(1, 8)
{ "uid" : 94374 } -->> { "uid" : { "$maxKey" : 1 } } on : shard_B Timestamp(1, 9)
各レプリカ(Primary)を確認して正常性を確認する
[root@db1 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
shard_A:PRIMARY> use logdb
switched to db logdb
shard_A:PRIMARY> db.logs.count()
31457
[root@db2 mongodb]# ./bin/mongo --port 27012
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
shard_B:PRIMARY> use logdb
switched to db logdb
shard_B:PRIMARY> db.logs.count()
37085
[root@db3 mongodb]# ./bin/mongo --port 27013
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
shard_C:PRIMARY> use logdb
switched to db logdb
shard_C:PRIMARY> db.logs.count()
31458
[root@core1 mongodb]# python -c "print(31457 + 37085 + 31458)"
100000
セカンダリも問題ないことを確認
[root@db2 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
shard_A:SECONDARY> db.getMongo().setSlaveOk()
shard_A:SECONDARY> use logdb
switched to db logdb
shard_A:SECONDARY> db.logs.count()
31457
[root@db1 mongodb]# ./bin/mongo --port 27013
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
shard_C:SECONDARY> db.getMongo().setSlaveOk()
shard_C:SECONDARY> use logdb
switched to db logdb
shard_C:SECONDARY> db.logs.count()
31458
[root@db3 mongodb]# ./bin/mongo --port 27012
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
shard_B:SECONDARY> db.getMongo().setSlaveOk()
shard_B:SECONDARY> use logdb
switched to db logdb
shard_B:SECONDARY> db.logs.count()
37085
db2に障害を発生させてみる
db2のmongodプロセスをkillする
[root@db2 mongodb]# pgrep -lf mongo
2572 ./bin/mongod -f conf/rs_shard_A.conf
2642 ./bin/mongod -f conf/rs_shard_B.conf
2712 ./bin/mongod -f conf/rs_shard_C.conf
[root@db2 mongodb]# pkill -f mongo
上記のような状態になって正常稼動していることを確認する。
レプリケーションなしのシャーディングでは障害発生状態となってmongodbが使えなくなるがレプリカ+シャーディング構成にしていたおかげでshard_Bがdb3で使える状態なのでまだ大丈夫。
shard_Aの状態
[root@db1 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
shard_A:PRIMARY> rs.status()
{
"set" : "shard_A",
"date" : ISODate("2017-01-30T02:29:51.989Z"),
"myState" : 1,
"term" : NumberLong(2),
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "db1:27011",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 7048,
"optime" : {
"ts" : Timestamp(1485741566, 280),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2017-01-30T01:59:26Z"),
"electionTime" : Timestamp(1485736504, 1),
"electionDate" : ISODate("2017-01-30T00:35:04Z"),
"configVersion" : 4,
"self" : true
},
{
"_id" : 1,
"name" : "db2:27011",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-01-30T02:29:50.504Z"),
"lastHeartbeatRecv" : ISODate("2017-01-30T02:23:57.064Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Connection refused",
"configVersion" : -1
},
{
"_id" : 2,
"name" : "db3:27011",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 6867,
"lastHeartbeat" : ISODate("2017-01-30T02:29:51.893Z"),
"lastHeartbeatRecv" : ISODate("2017-01-30T02:29:48.614Z"),
"pingMs" : NumberLong(0),
"configVersion" : 4
}
],
"ok" : 1
}
shard_Bの状態
db3にPrimaryが移動していることがわかる
[root@db1 mongodb]# ./bin/mongo --port 27012
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
shard_B:ARBITER> rs.status()
{
"set" : "shard_B",
"date" : ISODate("2017-01-30T02:35:23.035Z"),
"myState" : 7,
"term" : NumberLong(3),
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "db2:27012",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-01-30T02:35:18.606Z"),
"lastHeartbeatRecv" : ISODate("2017-01-30T02:23:57.256Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Connection refused",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "db3:27012",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 7196,
"optime" : {
"ts" : Timestamp(1485743048, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2017-01-30T02:24:08Z"),
"lastHeartbeat" : ISODate("2017-01-30T02:35:18.336Z"),
"lastHeartbeatRecv" : ISODate("2017-01-30T02:35:22.271Z"),
"pingMs" : NumberLong(0),
"electionTime" : Timestamp(1485743047, 1),
"electionDate" : ISODate("2017-01-30T02:24:07Z"),
"configVersion" : 4
},
{
"_id" : 2,
"name" : "db1:27012",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 7377,
"configVersion" : 4,
"self" : true
}
],
"ok" : 1
}
shard_Cの状態
[root@db1 mongodb]# ./bin/mongo --port 27013
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
shard_C:SECONDARY> rs.status()
{
"set" : "shard_C",
"date" : ISODate("2017-01-30T02:36:48.177Z"),
"myState" : 2,
"term" : NumberLong(3),
"syncingTo" : "db3:27013",
"heartbeatIntervalMillis" : NumberLong(2000),
"members" : [
{
"_id" : 0,
"name" : "db3:27013",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 7279,
"optime" : {
"ts" : Timestamp(1485741598, 78),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2017-01-30T01:59:58Z"),
"lastHeartbeat" : ISODate("2017-01-30T02:36:48.099Z"),
"lastHeartbeatRecv" : ISODate("2017-01-30T02:36:47.471Z"),
"pingMs" : NumberLong(0),
"electionTime" : Timestamp(1485736537, 1),
"electionDate" : ISODate("2017-01-30T00:35:37Z"),
"configVersion" : 4
},
{
"_id" : 1,
"name" : "db1:27013",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 7459,
"optime" : {
"ts" : Timestamp(1485741598, 78),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2017-01-30T01:59:58Z"),
"syncingTo" : "db3:27013",
"configVersion" : 4,
"self" : true
},
{
"_id" : 2,
"name" : "db2:27013",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"lastHeartbeat" : ISODate("2017-01-30T02:36:46.424Z"),
"lastHeartbeatRecv" : ISODate("2017-01-30T02:23:58.153Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Connection refused",
"configVersion" : -1
}
],
"ok" : 1
}
mongosから正常性確認
aggregateコマンドに関してはcountのドキュメントを参照のこと
[root@core1 mongodb]# ./bin/mongo --port 27000
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27000/test
mongos> use logdb
switched to db logdb
mongos> db.logs.count()
100000
mongos> db.logs.aggregate([{$group:{_id:null, count:{$sum:1}}}])
{ "_id" : null, "count" : 100000 }
障害状態を継続しつつデータ投入
mongosからfor文を回しての大量投入は途中でフリーズするなどの意図しない不具合が出たりしたのでpythonクライアントを使った。
# python3でpymongoは3.4.0
from pymongo import MongoClient
from pprint import pprint
client = MongoClient("core1", 27000)
#print(client.logdb.logs.count())
bulk = client.logdb.logs.initialize_ordered_bulk_op()
for uid in range(100001, 200001):
bulk.insert({ "uid": uid, "value": "hogehoge"})
pprint(bulk.execute())
上記のようにバルクインサートでpythonクライアントを実行するが、障害発生時は全部インサートできる時もあればできない時もあった。現象としてはそのままフリーズして動かない感じでstraceすると下記が出て何も進まない。
Process 4250 attached
recvfrom(5, ^CProcess 4250 detached
<detached ...>
db2のmongodを3プロセス復旧して正常状態に戻すとそのフリーズも解除してバルクインサートも再開するのだが、正常に入力できないデータが多数出てくる。原因究明は今のところできていない。
クライアントにはタイムアウトを設定するとか、サーバダウン時は速やかにアラート通知からの復旧作業をするとかで対処するしかないのだろうか。
db2を障害から復帰
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db2 mongodb]# pgrep -lf mongo
4015 ./bin/mongod -f conf/rs_shard_A.conf
4090 ./bin/mongod -f conf/rs_shard_B.conf
4164 ./bin/mongod -f conf/rs_shard_C.conf