solarisはcrash dumpの吐き出し後は、mdbでの解析が正攻法だが、
同様の調査をlinuxで行うためにはどのようにすればいいのか調査した。
直接的には関係ないが、memory空間がどうなっているのか、procfs経由で確認できる。
> cat /proc/iomem
00000000-00000fff : reserved
00001000-0009bfff : System RAM
0009c000-0009ffff : reserved
000a0000-000bffff : PCI Bus 0000:00
000a0000-000bffff : Video RAM area
000c0000-000dffff : PCI Bus 0000:00
000c0000-000cd9ff : Video ROM
000e0000-000fffff : reserved
000f0000-000fffff : System ROM
00100000-1fffffff : System RAM
01000000-0166d16f : Kernel code
0166d170-019c677f : Kernel data
01aab000-01b91fff : Kernel bss
20000000-201fffff : reserved
20200000-3fffffff : System RAM
40000000-401fffff : reserved
40200000-bad88fff : System RAM
bad89000-badd1fff : ACPI Non-volatile Storage
badd2000-badd9fff : ACPI Tables
badda000-badfdfff : reserved
badfe000-badfefff : System RAM
badff000-bae0efff : reserved
bae0f000-bae1cfff : ACPI Non-volatile Storage
bae1d000-bae40fff : reserved
bae41000-bae83fff : ACPI Non-volatile Storage
bae84000-baffffff : System RAM
bb000000-bb7fffff : RAM buffer
bb800000-bf9fffff : reserved
bba00000-bf9fffff : Graphics Stolen Memory
bfa00000-ffffffff : PCI Bus 0000:00
c0000000-cfffffff : 0000:00:02.0
d0000000-d00fffff : PCI Bus 0000:02
d0000000-d0003fff : 0000:02:00.0
d0000000-d0003fff : r8169
d0004000-d0004fff : 0000:02:00.0
d0004000-d0004fff : r8169
e0000000-efffffff : PCI MMCONFIG 0000 [bus 00-ff]
e0000000-efffffff : pnp 00:00
fe000000-fe3fffff : 0000:00:02.0
fe400000-fe403fff : 0000:00:1b.0
fe400000-fe403fff : ICH HD audio
fe404000-fe4040ff : 0000:00:1f.3
fe405000-fe4053ff : 0000:00:1d.0
fe405000-fe4053ff : ehci_hcd
fe406000-fe4063ff : 0000:00:1a.0
fe406000-fe4063ff : ehci_hcd
fe407000-fe40700f : 0000:00:16.0
fe407000-fe40700f : mei_me
fec00000-fec003ff : IOAPIC 0
fed00000-fed003ff : HPET 0
fed08000-fed08fff : pnp 00:07
fed10000-fed19fff : pnp 00:00
fed1c000-fed1ffff : reserved
fed1c000-fed1ffff : pnp 00:07
fed1f410-fed1f414 : iTCO_wdt
fed20000-fed3ffff : pnp 00:00
fed90000-fed93fff : pnp 00:00
fee00000-fee0ffff : pnp 00:00
fee00000-fee00fff : Local APIC
ff000000-ffffffff : reserved
ff000000-ffffffff : pnp 00:07
100000000-13fdfffff : System RAM
13fe00000-13fffffff : RAM buffer
kernelを強制的にpanicされるコマンドは以下。
> echo c > /proc/sysrq-trigger
これでpanicする仕組みは、以下のコード。
tty driverのinit時にprocfsのsysrq operationを登録する。
sysrqに対してwriteがあった場合は、callback handlerが起動され、sysrq_key_table[36]の
sysrq_crash_opが実行されpanicを発生する。
drivers/tty/sysrq.c
static int __init sysrq_init(void)
{
sysrq_init_procfs();
if (sysrq_on())
sysrq_register_handler();
return 0;
}
/*
* writing 'C' to /proc/sysrq-trigger is like sysrq-C
*/
static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
if (count) {
char c;
if (get_user(c, buf))
return -EFAULT;
__handle_sysrq(c, false);
}
return count;
}
static const struct file_operations proc_sysrq_trigger_operations = {
.write = write_sysrq_trigger,
.llseek = noop_llseek,
};
static void sysrq_init_procfs(void)
{
if (!proc_create("sysrq-trigger", S_IWUSR, NULL,
&proc_sysrq_trigger_operations))
pr_err("Failed to register proc interface\n");
}
取得したpanic dumpはcrash(8)コマンドで覗ける。
例えば以下のようなkernel staticな値。
mm/nommu.c
...<snip>
void *high_memory;
struct page *mem_map;
unsigned long max_mapnr;
unsigned long num_physpages;
unsigned long highest_memmap_pfn;
struct percpu_counter vm_committed_as;
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
int heap_stack_gap = 0;
...<snip>
crash> rd sysctl_overcommit_ratio
c066bfe0: 00000032 2...
crash> rd mem_map
c0793690: c9000000 ....
crash> struct page c9000000
struct page {
flags = 1024,
_count = {
counter = 1
},
_mapcount = {
counter = -1
},
{
{
private = 0,
mapping = 0x0
}
},
index = 0,
lru = {
next = 0xc9000018,
prev = 0xc9000018
}
}