Crystal コンパイラを改造してチューニングしたら、5万行ぐらいから百数行ぐらいになった!!ヤった!
Hello Crystal DPDK to LLVM IR (100ish lines)
require "../src/dpdk"
class Hello < Dpdk
def hello_thread
printf("Hello from lcore: %d\n", rte_lcore_id)
end
def main(argc, argv)
rte_eal_init(argc, argv)
rte_eal_mp_remote_launch(hello_thread)
rte_eal_mp_wait_lcore
end
end
Hello.run
# # Run the program
# # sudo build/hello
Output
% sudo build/hello
EAL: Detected 8 lcore(s)
EAL: Multi-process socket /var/run/.rte_unix
EAL: Probing VFIO support...
EAL: PCI device 0000:00:03.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:08.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:09.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:0a.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:10.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:11.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:12.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
EAL: PCI device 0000:00:13.0 on NUMA socket -1
EAL: Invalid NUMA socket, default to 0
EAL: probe driver: 8086:100e net_e1000_em
Hello from lcore: 1
Hello from lcore: 3
Hello from lcore: 4
Hello from lcore: 2
Hello from lcore: 5
Hello from lcore: 6
Hello from lcore: 7
Hello from lcore: 0
LLVM-IR
; ModuleID = 'main_module'
source_filename = "main_module"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%Hello = type { i32 }
@ARGC_UNSAFE = internal unnamed_addr global i32 0
@ARGV_UNSAFE = internal unnamed_addr global i8** null
@"Hello::instance" = local_unnamed_addr global %Hello* null
@"'Error with EAL init...'" = private constant { i32, i32, i32, [31 x i8] } { i32 1, i32 30, i32 30, [31 x i8] c"Error with EAL initialization\0A\00" }
@"'Hello from lcore: %...'" = private constant { i32, i32, i32, [22 x i8] } { i32 1, i32 21, i32 21, [22 x i8] c"Hello from lcore: %d\0A\00" }
@per_lcore__lcore_id = external thread_local local_unnamed_addr global i32
define void @__crystal_main(i32 %argc, i8** %argv) local_unnamed_addr {
alloca:
store i32 %argc, i32* @ARGC_UNSAFE, align 4
store i8** %argv, i8*** @ARGV_UNSAFE, align 8
%0 = load %Hello*, %Hello** @"Hello::instance", align 8
%1 = icmp eq %Hello* %0, null
br i1 %1, label %else.i.i, label %"*Hello::instance:Hello.exit.i"
else.i.i: ; preds = %alloca
%2 = tail call i8* @malloc(i64 4) #0
%3 = bitcast i8* %2 to i32*
store i32 137, i32* %3, align 4
store i8* %2, i8** bitcast (%Hello** @"Hello::instance" to i8**), align 8
br label %"*Hello::instance:Hello.exit.i"
"*Hello::instance:Hello.exit.i": ; preds = %else.i.i, %alloca
%4 = tail call i32 @rte_eal_init(i32 %argc, i8** %argv)
%5 = icmp slt i32 %4, 0
br i1 %5, label %then.i.i, label %"*Hello::run:Nil.exit"
then.i.i: ; preds = %"*Hello::instance:Hello.exit.i"
tail call void (i32, i8*, ...) @rte_exit(i32 1, i8* getelementptr inbounds ({ i32, i32, i32, [31 x i8] }, { i32, i32, i32, [31 x i8] }* @"'Error with EAL init...'", i64 0, i32 3, i64 0))
unreachable
"*Hello::run:Nil.exit": ; preds = %"*Hello::instance:Hello.exit.i"
%6 = tail call i32 @rte_eal_mp_remote_launch(i32 (i8*)* nonnull @"~procProc(Pointer(Void), Int32)@examples.hello.cr:10", i8* null, i32 1)
tail call void @rte_eal_mp_wait_lcore()
ret void
}
; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #0
; Function Attrs: norecurse nounwind readnone uwtable
define void @__crystal_sigfault_handler(i32 %sig, i8* nocapture %addr) local_unnamed_addr #1 {
entry:
ret void
}
; Function Attrs: uwtable
define i32 @main(i32 %argc, i8** %argv) local_unnamed_addr #2 {
entry:
store i32 %argc, i32* @ARGC_UNSAFE, align 4
store i8** %argv, i8*** @ARGV_UNSAFE, align 8
%0 = load %Hello*, %Hello** @"Hello::instance", align 8
%1 = icmp eq %Hello* %0, null
br i1 %1, label %else.i.i.i.i.i, label %"*Hello::instance:Hello.exit.i.i.i.i"
else.i.i.i.i.i: ; preds = %entry
%2 = tail call i8* @malloc(i64 4) #0
%3 = bitcast i8* %2 to i32*
store i32 137, i32* %3, align 4
store i8* %2, i8** bitcast (%Hello** @"Hello::instance" to i8**), align 8
br label %"*Hello::instance:Hello.exit.i.i.i.i"
"*Hello::instance:Hello.exit.i.i.i.i": ; preds = %else.i.i.i.i.i, %entry
%4 = tail call i32 @rte_eal_init(i32 %argc, i8** %argv)
%5 = icmp slt i32 %4, 0
br i1 %5, label %then.i.i.i.i.i, label %"*Crystal::main<Int32, Pointer(Pointer(UInt8))>:Int32.exit"
then.i.i.i.i.i: ; preds = %"*Hello::instance:Hello.exit.i.i.i.i"
tail call void (i32, i8*, ...) @rte_exit(i32 1, i8* getelementptr inbounds ({ i32, i32, i32, [31 x i8] }, { i32, i32, i32, [31 x i8] }* @"'Error with EAL init...'", i64 0, i32 3, i64 0))
unreachable
"*Crystal::main<Int32, Pointer(Pointer(UInt8))>:Int32.exit": ; preds = %"*Hello::instance:Hello.exit.i.i.i.i"
%6 = tail call i32 @rte_eal_mp_remote_launch(i32 (i8*)* nonnull @"~procProc(Pointer(Void), Int32)@examples.hello.cr:10", i8* null, i32 1)
tail call void @rte_eal_mp_wait_lcore()
ret i32 0
}
; Function Attrs: nounwind
declare noalias i8* @malloc(i64) local_unnamed_addr #0
declare i32 @rte_eal_init(i32, i8**) local_unnamed_addr
; Function Attrs: noreturn
declare void @rte_exit(i32, i8*, ...) local_unnamed_addr #3
; Function Attrs: nounwind uwtable
define internal i32 @"~procProc(Pointer(Void), Int32)@examples.hello.cr:10"(i8* nocapture readnone %_arg) #4 {
entry:
%0 = load %Hello*, %Hello** @"Hello::instance", align 8
%1 = icmp eq %Hello* %0, null
br i1 %1, label %else.i, label %"*Hello::instance:Hello.exit"
else.i: ; preds = %entry
%2 = tail call i8* @malloc(i64 4) #0
%3 = bitcast i8* %2 to i32*
store i32 137, i32* %3, align 4
store i8* %2, i8** bitcast (%Hello** @"Hello::instance" to i8**), align 8
br label %"*Hello::instance:Hello.exit"
"*Hello::instance:Hello.exit": ; preds = %entry, %else.i
%4 = load i32, i32* @per_lcore__lcore_id, align 4
%5 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ({ i32, i32, i32, [22 x i8] }, { i32, i32, i32, [22 x i8] }* @"'Hello from lcore: %...'", i64 0, i32 3, i64 0), i32 %4) #0
ret i32 0
}
declare i32 @rte_eal_mp_remote_launch(i32 (i8*)*, i8*, i32) local_unnamed_addr
declare void @rte_eal_mp_wait_lcore() local_unnamed_addr
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #0
attributes #0 = { nounwind }
attributes #1 = { norecurse nounwind readnone uwtable }
attributes #2 = { uwtable }
attributes #3 = { noreturn }
attributes #4 = { nounwind uwtable }
やったこと
GC,例外処理、Fiber,Threadなどを全て削除し、Cと同じレベルに落とした。