search
LoginSignup
0

More than 1 year has passed since last update.

posted at

updated at

[systemd] rc.localでNvidiaのGPUのファン制御をする

ながれ

  1. rc.localを有効にする
  2. GPUファン制御用のシェルスクリプトを作成する
  3. rc.localにスクリプトの実行パスを記述する

1. rc.localを有効にする

# touch /etc/rc.local
# chmod 755 /etc/rc.local
# vi /etc/systemd/system/rc-local.service

[Unit] Description=/etc/rc.local Compatibility
ConditionPathExists=/etc/rc.local

[Service] Type=forking
ExecStart=/etc/rc.local start
TimeoutSec=0
StandardOutput=tty
RemainAfterExit=yes
SysVStartPriority=99

[Install] WantedBy=multi-user.target
# systemctl enable rc-local
テスト
# vi /etc/rc.local
#!/bin/bash
echo "TEST OK" > /tmp/rc.local.status
再起動して確認
# reboot
# cat /tmp/rc.local.status

2. GPUファン制御用のシェルスクリプトを作成する

/opt/fancontrol/fancntrol.sh
#!/bin/sh

prf() { printf %s\\n "$*" ; }
z=$0; display=""; CDPATH=""; fname=""; num_gpus="0"; num_fans="0"; debug="0"
max_t="0"; max_t2="0"; mnt="0"; mxt="0"; ot="0"; tdiff="0"; cur_t="0"
new_spd="0"; cur_spd="0"; old_t="200"; check_diff1="0"; check_diff2="0"
fcurve_len="0"; fcurve_len2="0"; num_gpus_loop="0"; num_fans_loop="0"; old_s="0"
otl="-1"; sleep_override=""; gpu_cmd="nvidia-settings"

usage="Usage: $(basename "$0") [OPTION]...

where:
-c  [ARG] configuration file (default: $PWD/config)
-d  [ARG] display device string (e.g. \":0\", \"CRT-0\"), defaults to auto
-D  run in daemon mode (background process), using sh
-h  show this help text
-l  enable logging to stdout
-s  [ARG] set the sleep time (in seconds)
-v  show the current version of this script"

{ \unalias command; \unset -f command; } >/dev/null 2>&1
[ -n "$ZSH_VERSION" ] && options[POSIX_BUILTINS]=on
while true; do
    [ -L "$z" ] || [ -e "$z" ] || { prf "'$z' is invalid" >&2; exit 1; }
    command cd "$(command dirname -- "$z")"
    fname=$(command basename -- "$z"); [ "$fname" = '/' ] && fname=''
    if [ -L "$fname" ]; then
        z=$(command ls -l "$fname"); z=${z#* -> }; continue
    fi; break
done; conf_file=$(command pwd -P)
if [ "$fname" = '.' ]; then
    conf_file=${conf_file%/}
elif [ "$fname" = '..' ]; then
    conf_file=$(command dirname -- "${conf_file}")
else
    conf_file=${conf_file%/}/$fname
fi
conf_file=$(dirname -- "$conf_file")"/config"

while getopts ":c: :d: :D :h :l :s: :v :x" opt; do
    if [ "$opt" = "c" ]; then conf_file="$OPTARG"
    elif [ "$opt" = "d" ]; then display="-c $OPTARG"
    elif [ "$opt" = "D" ]; then nohup sh temp.sh >/dev/null 2>&1 &
        exit 1
    elif [ "$opt" = "h" ]; then prf "$usage"; exit 0
    elif [ "$opt" = "l" ]; then debug="1"
    elif [ "$opt" = "s" ]; then sleep_override="$OPTARG"
    elif [ "$opt" = "v" ]; then prf "Version 18"; exit 0
    elif [ "$opt" = "x" ]; then gpu_cmd="../nssim/nssim nvidia-settings"
    elif [ "$opt" = ":" ]; then prf "Option -$OPTARG requires an argument"
    else prf "Invalid option: -$OPTARG"; exit 1
    fi
done

prf "
################################################################################
#          nan0s7's script for automatically managing GPU fan speed            #
################################################################################
"
# FUNCTIONS THAT REQUIRE CERTAIN DEPENDENCIES TO BE MET
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# DEPENDS: PROCPS
kill_already_running() {
    tmp="$(pgrep -c temp.sh)"
    if [ "$tmp" -gt "1" ]; then
        process_pid="$(pgrep -o temp.sh)"
        kill "$process_pid"; prf "Killed $process_pid"
    fi
}
# DEPENDS: NVIDIA-SETTINGS
get_temp() {
    cur_t="$($gpu_cmd -q=[gpu:"$gpu"]/GPUCoreTemp -t $display)"
}
get_query() {
    prf "$($gpu_cmd -q "$1" $display)"
}
set_fan_control() {
    i=0
    while [ "$i" -le "$1" ]; do
        $gpu_cmd -a [gpu:"$i"]/GPUFanControlState="$2" $display
        i=$((i+1))
    done
}
set_speed() {
    $gpu_cmd -a [fan:"$fan"]/GPUTargetFanSpeed="$cur_spd" $display
}
finish() {
    set_fan_control "$num_gpus_loop" "0"
    prf "Fan control set back to auto mode"; exit 0
}; trap " finish" INT
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo_info() {
    e=" t=$cur_t ot=$ot td=$tdiff s=$sleep_time gpu=$gpu fan=$fan cd=$chd"
    e="$e nsp=$new_spd osp=$cur_spd maxt=$mxt mint=$mnt otl=$otl"
    prf "$e"
}
arr_size() {
    arr_len=0
    for element in $arr; do
        arr_len=$((arr_len+1))
    done
}
re_elem() {
    i=0
    elem=0
    for elem in $arr; do
        if [ "$i" -ne "$n" ]; then
            i=$((i+1))
        else
            break
        fi
    done
}
loop_cmds() {
    get_temp
    if [ "$cur_t" -ne "$ot" ]; then
        # Calculate difference and make sure it's positive
        if [ "$cur_t" -le "$ot" ]; then
            tdiff="$((ot-cur_t))"
        else
            tdiff="$((cur_t-ot))"
        fi
        if [ "$tdiff" -ge "$chd" ]; then
            if [ "$cur_t" -lt "$mnt" ]; then
                new_spd="0"; otl="-1"
            elif [ "$cur_t" -lt "$mxt" ]; then
                tl=0
                for arr_t in $tc; do
                    if [ "$cur_t" -le "$arr_t" ]; then
                        break
                    else
                        tl=$((tl+1))
                    fi
                done
                if [ "$tl" -ne "$otl" ]; then
                    arr="$fc"; n="$tl"; re_elem
                    new_spd="$elem"; otl="$tl"
                fi
            else
                new_spd="100"
            fi
            if [ "$new_spd" -ne "$cur_spd" ]; then
                cur_spd="$new_spd"
                set_speed
                i=0
                tmp="$old_s"; old_s=""
                for elem in $tmp; do
                    if [ "$i" -ne "$fan" ]; then
                        old_s="$old_s $elem"
                    else
                        old_s="$old_s $cur_spd"
                    fi
                    i=$((i+1))
                done
            fi
            i=0
            tmp="$old_t"; old_t=""
            for elem in $tmp; do
                if [ "$i" -ne "$fan" ]; then
                    old_t="$old_t $elem"
                else
                    old_t="$old_t $cur_t"
                fi
                i=$((i+1))
            done
            tdiff="0"
        fi
    fi
    if [ "$debug" -eq "1" ]; then
        echo_info
    fi
}
set_stuff() {
    arr="$fan2gpu"; n="$fan"; re_elem; gpu="$elem"
    arr="$which_curve"; n="$fan"; re_elem; tmp="$elem"
    if [ "$tmp" -eq "1" ]; then
        chd="$check_diff1"
        mnt="$min_t"; mxt="$max_t"
        tc="$tcurve"; fc="$fcurve"
    else
        chd="$check_diff2"
        mnt="$min_t2"; mxt="$max_t2"
        tc="$tcurve2"; fc="$fcurve2"
    fi
}

kill_already_running

# Load the config file
if ! [ -f "$conf_file" ]; then
    prf "Config file not found." >&2; exit 1
fi
. "$conf_file"; prf "Configuration file: $conf_file"

if [ -n "$sleep_override" ]; then sleep_time="$sleep_override"; fi

# Check for any user errors in config file
arr="$fcurve"; arr_size; size1="$arr_len"
arr="$tcurve"; arr_size; size2="$arr_len"
if ! [ "$size1" -eq "$size2" ]; then
    prf "fcurve and tcurve don't match up!"; exit 1
fi
arr="$fcurve2"; arr_size; size1="$arr_len"
arr="$tcurve2"; arr_size; size2="$arr_len"
if ! [ "$size1" -eq "$size2" ]; then
    prf "fcurve2 and tcurve2 don't match up!"; exit 1
fi
arr="$tcurve"; n="0"; re_elem
if [ "$min_t" -ge "$elem" ]; then
    prf "min_t is greater than the first value in the tcurve!"; exit 1
fi
arr="$tcurve2"; n="0"; re_elem
if [ "$min_t2" -ge "$elem" ]; then
    prf "min_t2 is greater than the first value in the tcurve2!"; exit 1
fi

# Calculate some more values
arr="$tcurve"; arr_size; arr="$tcurve"; n="$arr_len"; re_elem; max_t="$elem"
arr="$tcurve2"; arr_size; arr="$tcurve2"; n="$arr_len"; re_elem; max_t2="$elem"
arr="$fcurve"; arr_size; fcurve_len="$((arr_len-1))"
arr="$fcurve2"; arr_size; fcurve_len2="$((arr_len-1))"

# Get the system's GPU configuration
num_fans=$(get_query "fans"); num_fans="${num_fans%* Fan on*}"
if [ -z "$num_fans" ]; then
    prf "No Fans detected"; exit 1
elif [ "${#num_fans}" -gt "2" ]; then
    num_fans="${num_fans%* Fans on*}"
    num_fans_loop="$((num_fans-1))"
fi
prf "Number of Fans detected: $num_fans"
num_gpus=$(get_query "gpus"); num_gpus="${num_gpus%* GPU on*}"
if [ -z "$num_gpus" ]; then
    prf "No GPUs detected"; exit 1
elif [ "${#num_gpus}" -gt "2" ]; then
    num_gpus="${num_gpus%* GPUs on*}"
    num_gpus_loop="$((num_gpus-1))"
fi
prf "Number of GPUs detected: $num_gpus"

i=0
while [ "$i" -lt "$num_fans_loop" ]; do
    old_t="$old_t 0"
    old_s="$old_s 0"
    i=$((i+1))
done

if [ "$force_check" -eq "0" ]; then
    j=0
    while [ "$j" -le "$((fcurve_len-1))" ]; do
        arr="$tcurve"; n="$((j+1))"; re_elem; tmp1="$elem"
        arr="$tcurve"; n="$j"; re_elem; tmp2="$elem"
        check_diff1="$((check_diff1+tmp1-tmp2))"
        j=$((j+1))
    done
    check_diff1="$(((check_diff1/(fcurve_len-1))-sleep_time))"
    j=0
    while [ "$j" -le "$((fcurve_len2-1))" ]; do
        arr="$tcurve2"; n="$((j+1))"; re_elem; tmp1="$elem"
        arr="$tcurve2"; n="$j"; re_elem; tmp2="$elem"
        check_diff2="$((check_diff2+tmp1-tmp2))"
        j=$((j+1))
    done
    check_diff2="$(((check_diff2/(fcurve_len2-1))-sleep_time))"
else
    check_diff1="$force_check"; check_diff2="$force_check"
fi

set_fan_control "$num_gpus_loop" "1"

if [ "$num_gpus" -eq "1" ] && [ "$num_fans" -eq "1" ]; then
    prf "Started process for 1 GPU and 1 Fan"
    fan="$default_fan"
    set_stuff
    while true; do
        arr="$old_t"; n="$fan"; re_elem; ot="$elem"
        arr="$old_s"; n="$fan"; re_elem; cur_spd="$elem"
        loop_cmds
        sleep "$sleep_time"
    done
else
    prf "Started process for n-GPUs and n-Fans"
    while true; do
        fan=0
        while [ "$fan" -le "$num_fans_loop" ]; do
            set_stuff
            arr="$old_t"; n="$fan"; re_elem; ot="$elem"
            arr="$old_s"; n="$fan"; re_elem; cur_spd="$elem"
            loop_cmds
            fan=$((fan+1))
        done
        sleep "$sleep_time"
    done
fi
/opt/fancontrol/config
# min_t is the temperature at which every temperature below it will cause
#  the fan speed to be set to 0%, and everything above will be whatever the
#  first speed in fcurve is (default of 25%)
# min_t2 is only used with the second fan speed and temperature arrays, so
#  there is no need to change it unless you're using the second curve
min_t="25"
min_t2="25"

# How many seconds the script should wait until checking for a change in temps
sleep_time="7"

# By default it's set up so that when the temp is less than or equal to 35
#  degrees, the fan speed will be set to 25%. Next, if the temp is between 36
#  and 45, the fan speed should be set to 40%, etc.
# The last temperature value will be the maximum temperature before 100% fan
#  speed will be set
# You can make the array as big or as small as you require, as long as they
#  both end up being the same size
fcurve="25 40 55 70 85" # fan speeds
tcurve="35 45 55 65 75" # temperatures

# This value is used to determine the temperature difference needed to get
#  the script to check for a new speed to apply. The default of this value
#  is zero, which means the script will automatically calculate a value
#  based on the temperature curves supplied below
force_check="0"

# These two arrays are for GPU's that have a secondary fan that you may wish
#  to control seperately, especially if it is water-cooled.
fcurve2="15 30 45 60 75"
tcurve2="35 45 55 65 75"

# First number in array is fan 0, second number is fan 1, etc. If the number
#  is 1, that indicates that the script should use the first curve for that
#  fan. The same goes for the number 2.
which_curve="1 2 1 2"

# Only used for single-fan operation. If you have more than one gpu/fan but
#  only want to control one of them, select which one here. Otherwise there
#  is no need to change this setting.
default_fan="0"

# Similar to which_curve, but instead lets the script know which of the GPU's
#  has which fan. i.e. element 0 in the array being set to 0 means that fan 0
#  is assigned to GPU 0, element 1 is 0 too, meaning fan 1 is on GPU 0 as well
fan2gpu="0 0 1 1"

3. rc.localにスクリプトの実行パスを記述する

/etc/rc.local
bash /opt/fancontrol/fancontrol.sh
exit 0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
What you can do with signing up
0