そもそも、swpエミュレーションってなんで必要なの?
簡単にまとめると、
(1) もともと2レジスタ間でデータを交換するSWP命令が存在していた。
(2) マルチコアCPUになったところ、別CPUとの排他制御が必要になった
(3) 上位層にSWP命令使うな、といいたい。だけど、そうも言ってられないので、Kernel側でソフトエミュレーションする仕組みが必要になった。
ですね……。
なお、データ交換するxchg命令は、x86などの別のアーキテクチャでも存在します。
ARM社ドキュメントへのリンク
SWP命令
- http://infocenter.arm.com/help/topic/com.arm.doc.100069_0610_01_en/pge1425914080521.html
- http://infocenter.arm.com/help/topic/com.arm.doc.dui0801cj/pge1425914080521_00004.html
LDREX命令 / STREX命令
-
http://infocenter.arm.com/help/topic/com.arm.doc.100069_0610_01_en/pge1425890318276.html
-
http://infocenter.arm.com/help/topic/com.arm.doc.dui0801cj/pge1425890318276_00004.html
-
http://infocenter.arm.com/help/topic/com.arm.doc.100069_0610_01_en/pge1425890604489.html
-
http://infocenter.arm.com/help/topic/com.arm.doc.dui0801cj/pge1425890604489_00003.html
1. 起動時にtrap.cにSWP emurationを登録
swp_emulate.c から traps.c にフック関数の登録依頼
起動時に、 late_initcall(swp_emulation_init); → swp_emulation_init() → register_undef_hook() ちうコールチェーンになって、swp_handler()がundef_hookで登録される。
/*
* Only emulate SWP/SWPB executed in ARM state/User mode.
* The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
*/
static struct undef_hook swp_hook = {
.instr_mask = 0x0fb00ff0,
.instr_val = 0x01000090,
.cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT,
.cpsr_val = USR_MODE,
.fn = swp_handler ★★★★★ココ
};
/*
* Register handler and create status file in /proc/cpu
* Invoked as late_initcall, since not needed before init spawned.
*/
static int __init swp_emulation_init(void)
{
if (cpu_architecture() < CPU_ARCH_ARMv7)
return 0;
# ifdef CONFIG_PROC_FS
if (!proc_create_single("cpu/swp_emulation", S_IRUGO, NULL,
proc_status_show))
return -ENOMEM;
# endif /* CONFIG_PROC_FS */
pr_notice("Registering SWP/SWPB emulation handler\n");
register_undef_hook(&swp_hook); ★★★★★ココ
return 0;
}
late_initcall(swp_emulation_init); ★★★★★ココ
trap.c は、hook関数を登録する。
arch/arm/kernel/traps.c で登録する。
void register_undef_hook(struct undef_hook *hook)
{
unsigned long flags;
raw_spin_lock_irqsave(&undef_lock, flags);
list_add(&hook->node, &undef_hook);
raw_spin_unlock_irqrestore(&undef_lock, flags);
}
2. trap発動(させるまで)
ベクタテーブル -> kernelまで
雑に説明すると、ベクターテーブルでundef instructionを見つけると、フック関数に登録しておいたswpのエミュレーション関数が呼ばれる。
vector_rst:
ARM( swi SYS_ERROR0 )
THUMB( svc #0 )
THUMB( nop )
b vector_und
<略>
/*
* Undef instr entry dispatcher
* Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
*/
vector_stub und, UND_MODE
.long __und_usr @ 0 (USR_26 / USR_32)
.long __und_invalid @ 1 (FIQ_26 / FIQ_32)
.long __und_invalid @ 2 (IRQ_26 / IRQ_32)
.long __und_svc @ 3 (SVC_26 / SVC_32) ★★★★★★★★★★★ ココ!!
.long __und_invalid @ 4
.long __und_invalid @ 5
.long __und_invalid @ 6
.long __und_invalid @ 7
.long __und_invalid @ 8
.long __und_invalid @ 9
.long __und_invalid @ a
.long __und_invalid @ b
.long __und_invalid @ c
.long __und_invalid @ d
.long __und_invalid @ e
.long __und_invalid @ f
.align 5
<略>
.align 5
__und_svc:
# ifdef CONFIG_KPROBES
@ If a kprobe is about to simulate a "stmdb sp..." instruction,
@ it obviously needs free stack space which then will belong to
@ the saved context.
svc_entry MAX_STACK_SIZE
# else
svc_entry
# endif
@
@ call emulation code, which returns using r9 if it has emulated
@ the instruction, or the more conventional lr if we are to treat
@ this as a real undefined instruction
@
@ r0 - instruction
@
# ifndef CONFIG_THUMB2_KERNEL
ldr r0, [r4, #-4]
# else
mov r1, #2
ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2
cmp r0, #0xe800 @ 32-bit instruction if xx >= 0
blo __und_svc_fault
ldrh r9, [r4] @ bottom 16 bits
add r4, r4, #2
str r4, [sp, #S_PC]
orr r0, r9, r0, lsl #16
# endif
badr r9, __und_svc_finish
mov r2, r4
bl call_fpe
mov r1, #4 @ PC correction to apply
__und_svc_fault:
mov r0, sp @ struct pt_regs *regs
bl __und_fault ★★★★★★★★★★★ ココ!!
__und_svc_finish:
get_thread_info tsk
ldr r5, [sp, #S_PSR] @ Get SVC cpsr
svc_exit r5 @ return from exception
UNWIND(.fnend )
ENDPROC(__und_svc)
<略>
__und_fault:
@ Correct the PC such that it is pointing at the instruction
@ which caused the fault. If the faulting instruction was ARM
@ the PC will be pointing at the next instruction, and have to
@ subtract 4. Otherwise, it is Thumb, and the PC will be
@ pointing at the second half of the Thumb instruction. We
@ have to subtract 2.
ldr r2, [r0, #S_PC]
sub r2, r2, r1
str r2, [r0, #S_PC]
b do_undefinstr ★★★★★★★★★★★★★★★★★ ココ!!
ENDPROC(__und_fault)
kernel → hook関数
さて、これで無事に do_undefinstrが呼び出される。
ここでは、すでに事前登録されたhook関数を呼び出す関数を呼び出す(めんどい言い方ですが…)
smlinkage void do_undefinstr(struct pt_regs *regs)
{
unsigned int instr;
void __user *pc;
pc = (void __user *)instruction_pointer(regs);
if (processor_mode(regs) == SVC_MODE) {
<略>
instr = __mem_to_opcode_arm(*(u32 *) pc);
} else if (thumb_mode(regs)) {
if (get_user(instr, (u16 __user *)pc))
goto die_sig;
instr = __mem_to_opcode_thumb16(instr);
if (is_wide_instruction(instr)) {
unsigned int instr2;
if (get_user(instr2, (u16 __user *)pc+1))
goto die_sig;
instr2 = __mem_to_opcode_thumb16(instr2);
instr = __opcode_thumb32_compose(instr, instr2);
}
} else {
if (get_user(instr, (u32 __user *)pc))
goto die_sig;
instr = __mem_to_opcode_arm(instr);
}
if (call_undef_hook(regs, instr) == 0) ★★★★★★★ココ
return;
<略>
}
NOKPROBE_SYMBOL(do_undefinstr)
call_undef_hook() から、hookに登録したある関数が呼び出される。
static nokprobe_inline
int call_undef_hook(struct pt_regs *regs, unsigned int instr)
{
struct undef_hook *hook;
unsigned long flags;
int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL;
raw_spin_lock_irqsave(&undef_lock, flags);
list_for_each_entry(hook, &undef_hook, node)
if ((instr & hook->instr_mask) == hook->instr_val &&
(regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val)
fn = hook->fn;
raw_spin_unlock_irqrestore(&undef_lock, flags);
return fn ? fn(regs, instr) : 1;
}
3. trap発動!!
さて、それではフック関数が呼ばれたら、に入ります。
/*
* swp_handler logs the id of calling process, dissects the instruction, sanity
* checks the memory location, calls emulate_swpX for the actual operation and
* deals with fixup/error handling before returning
*/
static int swp_handler(struct pt_regs *regs, unsigned int instr)
{
unsigned int address, destreg, data, type;
unsigned int res = 0;
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
res = arm_check_condition(instr, regs->ARM_cpsr);
switch (res) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
/* Condition failed - return to next instruction */
regs->ARM_pc += 4;
return 0;
case ARM_OPCODE_CONDTEST_UNCOND:
/* If unconditional encoding - not a SWP, undef */
return -EFAULT;
default:
return -EINVAL;
}
if (current->pid != previous_pid) {
pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
current->comm, (unsigned long)current->pid);
previous_pid = current->pid;
}
address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)];
data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
type = instr & TYPE_SWPB;
pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
EXTRACT_REG_NUM(instr, RN_OFFSET), address,
destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);
/* Check access in reasonable access range for both SWP and SWPB */
if (!access_ok((address & ~3), 4)) {
pr_debug("SWP{B} emulation: access to %p not allowed!\n",
(void *)address);
res = -EFAULT;
} else {
res = emulate_swpX(address, &data, type);
}
if (res == 0) {
/*
* On successful emulation, revert the adjustment to the PC
* made in kernel/traps.c in order to resume execution at the
* instruction following the SWP{B}.
*/
regs->ARM_pc += 4;
regs->uregs[destreg] = data;
} else if (res == -EFAULT) {
/*
* Memory errors do not mean emulation failed.
* Set up signal info to return SEGV, then return OK
*/
set_segfault(regs, address);
}
return 0;
}
3.1 arm_check_condition
現状の状態で、当該命令を実行するべきかどうかの判断。
- ARM_OPCODE_CONDTEST_FAIL - 実行する必要なし。
- ARM_OPCODE_CONDTEST_PASS - 実行しなければならない。
- ARM_OPCODE_CONDTEST_UNCOND - Never あるいは…
/*
* Returns:
* ARM_OPCODE_CONDTEST_FAIL - if condition fails
* ARM_OPCODE_CONDTEST_PASS - if condition passes (including AL)
* ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional
* opcode space from v5 onwards
*
* Code that tests whether a conditional instruction would pass its condition
* check should check that return value == ARM_OPCODE_CONDTEST_PASS.
*
* Code that tests if a condition means that the instruction would be executed
* (regardless of conditional or unconditional) should instead check that the
* return value != ARM_OPCODE_CONDTEST_FAIL.
*/
3.2 emulate_swpX()
static int emulate_swpX(unsigned int address, unsigned int *data,
unsigned int type)
{
unsigned int res = 0;
// TYPE_SWPBの場合、アドレスがunalignedだとエラー
if ((type != TYPE_SWPB) && (address & 0x3)) {
/* SWP to unaligned address not permitted */
pr_debug("SWP instruction on unaligned pointer!\n");
return -EFAULT;
}
// 他と調停しながら、__user_swp[b]_asm()を呼び出す。
// resが-EAGAINだったらやり直し。
while (1) {
unsigned long temp;
unsigned int __ua_flags;
__ua_flags = uaccess_save_and_enable();
if (type == TYPE_SWPB)
__user_swpb_asm(*data, address, res, temp);
else
__user_swp_asm(*data, address, res, temp);
uaccess_restore(__ua_flags);
if (likely(res != -EAGAIN) || signal_pending(current))
break;
cond_resched();
}
// カウンタアップ
if (res == 0) {
if (type == TYPE_SWPB)
swpbcounter++;
else
swpcounter++;
}
return res;
}
3.3 user_swp[b]_arm()
/*
* Error-checking SWP macros implemented using ldrex{b}/strex{b}
*/
# define __user_swpX_asm(data, addr, res, temp, B) \
__asm__ __volatile__( \
"0: ldrex"B" %2, [%3]\n" \
"1: strex"B" %0, %1, [%3]\n" \
" cmp %0, #0\n" \
" moveq %1, %2\n" \
" movne %0, %4\n" \
"2:\n" \
" .section .text.fixup,\"ax\"\n" \
" .align 2\n" \
"3: mov %0, %5\n" \
" b 2b\n" \
" .previous\n" \
" .section __ex_table,\"a\"\n" \
" .align 3\n" \
" .long 0b, 3b\n" \
" .long 1b, 3b\n" \
" .previous" \
: "=&r" (res), "+r" (data), "=&r" (temp) \
: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \
: "cc", "memory")
# define __user_swp_asm(data, addr, res, temp) \
__user_swpX_asm(data, addr, res, temp, "")
# define __user_swpb_asm(data, addr, res, temp) \
__user_swpX_asm(data, addr, res, temp, "b")
読みやすく書き直してしまうと
ldrexとstrexでデータ交換しているだけになる(後半部分はよくわからないね!)
-EAGAINを介したら、上位層emulate_swpX()で成功するまでリトライされる。
0: ldrex"B" %temp, [addr] # [addr]の内容を、tempに読み出す
1: strex"B" %res, %data, [addr] # dataの内容を、[addr]に書き出す
# resはアップデートされたら0, アップデートできなければ1
cmp %res, #0 # resが0であるかを判断
moveq %data, %temp # true => tempの内容をdataに書き出す
movne %res, -EAGAIN # false => res = -EAGAIN
2:
.section .text.fixup,"ax"
.align 2
3: mov %res, -EFAULT
b 2b
.previous
.section __ex_table,"a"
.align 3
.long 0b, 3b
.long 1b, 3b
.previous
: "=&r" (res), "+r" (data), "=&r" (temp)
: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)
: "cc", "memory"
)
以上です。