Here is my modified fan script.
Code:
#!/usr/local/bin/bash
# spinpid.sh for Dell R720 with single fan zone
VERSION="2018-06-23"
# Run as superuser. See notes at end.
##############################################
#
# Settings
#
##############################################
. /mnt/Storage/scripts/host.config
# Full path to 'smartctl' program:
smartctl=/usr/local/sbin/smartctl
# Creates logfile and sends all stdout and stderr to the log, as well as to the console.
# If you want to append to existing log, add '-a' to the tee command.
# Change to your desired log location/name:
LOG=/mnt/Storage/scripts/logs/spinpid.log
exec > >(tee -i $LOG) 2>&1
#DUTY_MIN=10 # Fan minimum duty cycle (%) (to avoid stalling) - moved to host.config
################# DRIVE SETTINGS ################
SP=33.57 # Setpoint mean drive temperature (C)
# Time interval for checking drives (minutes). Drives change
# temperature slowly; 5 minutes is probably frequent enough.
DRIVE_T=5
Kp=4 # Proportional tunable constant (for drives)
Ki=0 # Integral tunable constant (for drives)
Kd=40 # Derivative tunable constant (for drives)
################# CPU SETTINGS ################
# Time interval for checking CPU (seconds).
# 1 to 12 may be appropriate. Set to 20 for r720xd since there is lag
CPU_T=20
# Reference temperature (C) for scaling CPU_DUTY (NOT a setpoint).
# At and below this temperature, CPU will demand minimum
# fan speed (DUTY_MIN above).
#CPU_REF=70 # Integer only! - moved to host.config
# Scalar for scaling CPU_DUTY.
# CPU will demand this number of percentage points in additional
# duty cycle for each degree of temperature above CPU_REF.
#CPU_SCALE=4 # Integer only! - moved to host.config
#############################
# Drive count:
#############################
function get_smart_drives {
gs_smartdrives=""
gs_drives=$("$smartctl" --scan | awk '{print $1}')
for gs_drive in $gs_drives; do
gs_smart_flag=$("$smartctl" -i "$gs_drive" | grep "SMART support is: Enabled" | awk '{print $4}')
if [ "$gs_smart_flag" = "Enabled" ]; then
gs_smartdrives="$gs_smartdrives $gs_drive"
fi
done
echo "$gs_smartdrives"
}
############################################################
# function print_header
# Called when script starts and each quarter day
############################################################
function print_header {
DATE=$(date +"%A, %b %d")
printf "\n%s \n" "$DATE"
echo -n " "
printf "%4s %5s %5s %6s %5s %6s %3s %s %4s %4s %s" "Tmax" "Tmean" "ERRc" "P" "I" "D" "CPU" "Driver" "Prev/New" "RPM" "Adjustments"
}
#################################################
# function read_fan_data
#################################################
function read_fan_data {
# Get reported fan speed in RPM from sensor data repository.
# Takes the pertinent FAN line, then 3 to 5 consecutive digits
SDR=$($IPMITOOL sdr elist all)
RPM1=$(echo "$SDR" | grep "Fan1" | grep -Eo '[0-9]{3,5}')
RPM2=$(echo "$SDR" | grep "Fan2" | grep -Eo '[0-9]{3,5}')
RPM3=$(echo "$SDR" | grep "Fan3" | grep -Eo '[0-9]{3,5}')
RPM4=$(echo "$SDR" | grep "Fan4" | grep -Eo '[0-9]{3,5}')
RPM5=$(echo "$SDR" | grep "Fan5" | grep -Eo '[0-9]{3,5}')
RPM6=$(echo "$SDR" | grep "Fan6" | grep -Eo '[0-9]{3,5}')
}
##############################################
# function CPU_check_adjust
# Get CPU temp. Calculate a new DUTY_CPU.
# If it is greater than the duty due to the
# drives, send it to adjust_fans.
##############################################
function CPU_check_adjust {
# Find hottest CPU core
MAX_CORE_TEMP=0
cpucores=$CORES
SDR=$($IPMITOOL sdr elist all)
for core in $(seq 1 "$cpucores"); do
temp=$(echo "$SDR" | grep "3.$core" | grep "Temp" | awk '{print $9}')
if [ "$temp" -lt 0 ]; then
temp="-n/a-"
else
temp="${temp}"
fi
if [[ $temp -gt $MAX_CORE_TEMP ]]; then MAX_CORE_TEMP=$temp; fi
done
CPU_TEMP=$MAX_CORE_TEMP
RPM1=$(echo "$SDR" | grep "Fan1" | grep -Eo '[0-9]{3,5}')
RPM2=$(echo "$SDR" | grep "Fan2" | grep -Eo '[0-9]{3,5}')
RPM3=$(echo "$SDR" | grep "Fan3" | grep -Eo '[0-9]{3,5}')
RPM4=$(echo "$SDR" | grep "Fan4" | grep -Eo '[0-9]{3,5}')
RPM5=$(echo "$SDR" | grep "Fan5" | grep -Eo '[0-9]{3,5}')
RPM6=$(echo "$SDR" | grep "Fan6" | grep -Eo '[0-9]{3,5}')
DUTY_CPU=$( constrain $(( (CPU_TEMP - CPU_REF) * CPU_SCALE + DUTY_MIN )) )
if [[ FIRST_TIME -eq 1 ]]; then return; fi
local NEW=$DUTY_CPU
local DUTY_PREV=$DUTY_CURR
# This allows fans to come down faster after high CPU demand.
# Adjust DUTY_DRIVE if it will go down (PID<0) and drives are cool
# (Tmean<<SP), otherwise changes are not good.
if [[ PID -lt 0 && (( $(bc -l <<< "$Tmean < $SP-1") == 1 )) ]]; then
DUTY_DRIVE=$( constrain $(( DUTY_CURR + PID )) )
fi
# NEW=$(( DUTY_DRIVE > DUTY_CPU ? DUTY_DRIVE : DUTY_CPU )) # take max
if [[ DUTY_DRIVE -gt DUTY_CPU ]]; then
NEW=$DUTY_DRIVE
DRIVER="Drives"
else
if [[ DUTY_CPU -ge 30 ]]; then
DRIVER="AUTO"
else
DRIVER="CPU"
fi
fi
adjust_fans "$NEW" # sets DUTY_CURR
# DIAGNOSTIC variables - uncomment for troubleshooting:
# printf "\nDUTY_DRIVE=%s, DUTY_DRIVER=%s, CPU_TEMP=%s, DUTY_CPU=%s, DUTY_CURR=%s , FAN1=%s " "${DUTY_DRIVE:---}" "$DRIVER" "${CPU_TEMP:---}" "${DUTY_CPU:---}" "${DUTY_CURR:---}" "${RPM1:-----}"
if [[ DUTY_PREV -ne DUTY_CURR ]]; then
# printf "%s%d " $SOURCE $DUTY_CURR
printf "%d " $DUTY_CURR
fi
}
##############################################
# function DRIVES_check_adjust
# Print time on new log line.
# Go through each drive, getting and printing
# status and temp. Calculate sum and max
# temp, then call function drive_data.
# Apply max of $PID and CPU_CORR to the fans.
##############################################
function DRIVES_check_adjust {
echo # start new line
# print time on each line
TIME=$(date "+%H:%M:%S"); echo -n "$TIME "
Tmax=0; Tsum=0 # initialize drive temps for new loop through drives
i=0 # count number of spinning drives
for drive in $drives; do
temp=$("$smartctl" -A "$drive" | grep "194 Temperature" | awk '{print $10}')
if [ -z "$temp" ]; then
temp=$("$smartctl" -A "$drive" | grep "190 Airflow_Temperature" | awk '{print $10}')
fi
if [ -z "$temp" ]; then
temp="-n/a-"
else
temp="${temp}"
fi
let "Tsum += $temp"
if [[ $temp > $Tmax ]]; then Tmax=$temp; fi;
let "i += 1"
done
# if no drives are spinning
if [ $i -eq 0 ]; then
Tmean=""; Tmax=""; P=""; D=""
DUTY_DRIVE=$DUTY_MIN
else
# summarize, calculate PID
Tmean=$(echo "scale=2; $Tsum / $i" | bc)
ERRp=$ERRc
ERRc=$(echo "scale=3; ($Tmax - $SP) / 1" | bc)
ERR=$(echo "scale=2; $ERRc * $DRIVE_T + $I" | bc)
P=$(echo "scale=3; ($Kp * $ERRc) / 1" | bc)
P=$(printf %0.2f "$P") # add leading 0 if needed, 2 dec. places
I=$(echo "scale=2; ($Ki * $ERR) / 1" | bc)
D=$(echo "scale=4; $Kd * ($ERRc - $ERRp) / $DRIVE_T" | bc)
D=$(printf %0.2f "$D") # add leading 0 if needed, 2 dec. places
PID=$(echo "$P + $I + $D" | bc) # add 3 corrections
PID=$(printf %0.f "$PID") # round
# add leading 0 if needed, round for printing
Tmean=$(printf %0.2f "$Tmean")
ERRc=$(printf %0.2f "$ERRc")
P=$(printf %0.2f "$P")
D=$(printf %0.2f "$D")
PID=$(printf %0.f "$PID")
DUTY_DRIVE=$( constrain $(( DUTY_CURR + PID )) )
fi
if [[ $DUTY_DRIVE -ge $DUTY_CPU ]]; then
adjust_fans "$DUTY_DRIVE"
DRIVER="Drives"
else
if [[ $DUTY_CPU -ge 30 ]]; then
DRIVER="AUTO"
else
DRIVER="CPU"
fi
fi
# print current Tmax, Tmean
printf "^%-3s %5s" "${Tmax:---}" "${Tmean:----}"
}
##############################################
# function constrain
# Constrain passed duty and return it
##############################################
function constrain {
local DUTY=$1
# Don't allow duty cycle beyond $DUTY_MIN/95%
if [[ $DUTY -gt 95 ]]; then DUTY=95; fi
if [[ $DUTY -lt $DUTY_MIN ]]; then DUTY=$DUTY_MIN; fi
echo "$DUTY"
}
##############################################
# function adjust_fans
# Constrain passed duty, then set if different
##############################################
function adjust_fans {
local DUTY_NEW=$1
# Change if different from current duty
if [[ $DUTY_NEW -ne $DUTY_CURR ]]; then
if [[ $DUTY_NEW -gt 30 ]]; then
if [[ $DUTY_CURR -lt 30 ]]; then
# Set new duty cycle. "echo -n ``" prevents newline generated in log
echo -n "$($IPMITOOL raw 0x30 0x30 0x01 0x01)"; sleep 1
fi
DUTY_CURR=$DUTY_NEW
else
# Set new duty cycle. "echo -n ``" prevents newline generated in log
echo -n "$($IPMITOOL raw 0x30 0x30 0x01 0x00)"; sleep 1
echo -n "$($IPMITOOL raw 0x30 0x30 0x02 0xff "$DUTY_NEW")"
DUTY_CURR=$DUTY_NEW
fi
fi
}
#####################################################
# SETUP
# All this happens only at the beginning
# Initializing values, list of drives, print header
#####################################################
# Print settings at beginning of log
printf "\n****** SETTINGS ******\n"
printf "Drive temperature setpoint (C): %s\n" $SP
printf "Kp=%s, Ki=%s, Kd=%s\n" $Kp $Ki $Kd
printf "Drive check interval (main cycle; minutes): %s\n" $DRIVE_T
printf "CPU check interval (seconds): %s\n" $CPU_T
printf "CPU reference temperature (C): %s\n" $CPU_REF
printf "CPU scalar: %s\n" $CPU_SCALE
printf "Fan minimum duty cycle: %s\n" $DUTY_MIN
# Set number of CPU sockets to check for temperature
CORES=2
DUTY_CURR=$DUTY_MIN
CPU_LOOPS=$( echo "$DRIVE_T * 60 / $CPU_T" | bc ) # Number of whole CPU loops per drive loop
I=0; ERRc=0 # Initialize errors to 0
FIRST_TIME=1
# Get list of drives
drives=$(get_smart_drives)
read_fan_data # get fan status before making any adjustments
# Set mode to 'Manual' to avoid BMC changing duty cycle
# Need to wait a tick or it may not get next command
# "echo -n" to avoid annoying newline generated in log
echo -n "$($IPMITOOL raw 0x30 0x30 0x01 0x00)"; sleep 1
echo -n "$($IPMITOOL raw 0x30 0x30 0x02 0xff "$DUTY_CURR")"; sleep 1
# DUTY_DRIVE NEEDS initial value. Use DUTY_CURR unless it is
# very high and would take a long time to equilibrate.
# (or doesn't exist; second test true if it exists)
if [[ $DUTY_CURR -lt 50 && -n ${DUTY_CURR+x} ]]; then
DUTY_DRIVE=$DUTY_CURR
else
DUTY_DRIVE=50
fi
print_header
CPU_check_adjust
###########################################
# Main loop through drives every DRIVE_T minutes
# and CPU every CPU_T seconds
###########################################
while true ; do
# Print header every quarter day. Expression removes any
# leading 0 so it is not seen as octal
HM=$(date +%k%M)
HM=$( echo $HM | awk '{print $1 + 0}' )
R=$(( HM % 600 )) # remainder after dividing by 6 hours
if (( R < DRIVE_T )); then
print_header;
fi
DUTY_PREV=$DUTY_CURR
DRIVES_check_adjust
printf "%6s %6s %5s %6.6s %3d %-6s %2d/%-5d " "${ERRc:----}" "${P:----}" $I "${D:----}" "$CPU_TEMP" $DRIVER "$DUTY_PREV" "$DUTY_CURR"
if [[ $FIRST_TIME -eq 0 ]]; then
sleep 5
read_fan_data
fi
FIRST_TIME=0
printf "%4d " "$RPM1"
i=0
while [ $i -lt "$CPU_LOOPS" ]; do
CPU_check_adjust
sleep $CPU_T
let i=i+1
done
done
# For Dell r720 with one fan zone.
# Adjusts fans based on drive and CPU temperatures.
# Includes disks on motherboard and on HBA.
# The script compares the cooling demand of drives and
# CPU and uses whichever is greater.
# Mean drive temp is maintained at a setpoint using a PID algorithm.
# CPU temp need not and cannot be maintained at a setpoint,
# so PID is not used; instead fan duty cycle demand is simply
# increased with temp using reference and scale settings.
# If CPU fan speed is above 30%, switch to auto fan control.
# Drives are checked and fans adjusted on a set interval, such as 5 minutes.
# Logging is done at that point. CPU temps can spike much faster,
# so are checked and logged at a shorter interval, such as 1-25 seconds.
# CPUs with high TDP probably require short intervals.
# Logs:
# - Max and mean disk temperature
# - Temperature error and PID variables
# - CPU temperature
# - Previous and new duty cycle
# - Fan mode (should always be FULL after first line)
# - RPM for FAN1 after new duty cycle
# - Interim adjustments due to CPU demand
# Relation between percent duty cycle, hex value of that number,
# and RPMs for my fans. RPM will vary among fans, is not
# precisely related to duty cycle, and does not matter to the script.
# It is merely reported.
# Tuning suggestions
# PID tuning advice on the internet generally does not work well in this application.
# First run the script spincheck.sh and get familiar with your temperature and fan variations without any intervention.
# Choose a setpoint that is an actual observed Tmean, given the number of drives you have. It should be the Tmean associated with the Tmax that you want.
# Set Ki=0 and leave it there. You probably will never need it.
# Start with Kp low. Use a value that results in a rounded correction=1 when error is the lowest value you observe other than 0 (i.e., when ERRc is minimal, Kp ~= 1 / ERRc)
# Set Kd at about Kp*10
# Get Tmean within ~0.3 degree of SP before starting script.
# Start script and run for a few hours or so. If Tmean oscillates (best to graph it), you probably need to reduce Kd. If no oscillation but response is too slow, raise Kd.
# Stop script and get Tmean at least 1 C off SP. Restart. If there is overshoot and it goes through some cycles, you may need to reduce Kd.
# If you have problems, examine PK and PD in the log and see which is messing you up. If all else fails you can try Ki. If you use Ki, make it small, ~ 0.1 or less.
Here is my config file that is used with this script and the ESXI host scripts for startup and shutdown of VM's hosted on FreeNAS
Code:
################################################################################
################################################################################
#
# Define host-specific values for these variables:
#
# freenashost : defaults to short hostname returned by 'hostname -s'
# esxihost : user-defined
# datastores : user-defined
# logdir : user-defined (and must exist before use)
#
################################################################################
################################################################################
freenashost=$(hostname -s)
# Edit to suit your needs and environment
# This is an example, showing the setup for these AIO systems:
#
# FreeNAS host 'freenas' running on ESXi host 'ESXi_Host' with datastore 'VM_DATASET'
# FreeNAS host 'freenas1' running on ESXi host 'ESXi_Host1' with datastore 'VM_DATASET'
#
# Configured to store logs in unique directories specified by 'logdir'
if [ "${freenashost}" = "freenas" ]; then
esxihost=IP_esxi_host
ipmihost=IP_iDRAC
datastores="VM_DATASET"
logdir=/mnt/Storage/scripts/logs
ipmiuser=user
ipmipw=password
IPMITOOL="/usr/local/bin/ipmitool -I lanplus -H "$ipmihost" -U "$ipmiuser" -P "$ipmipw""
DUTY_MIN=10 # Fan minimum duty cycle (%) (to avoid stalling)
# Reference temperature (C) for scaling CPU_DUTY (NOT a setpoint).
# At and below this temperature, CPU will demand minimum
# fan speed (DUTY_MIN above).
CPU_REF=70 # Integer only!
# Scalar for scaling CPU_DUTY.
# CPU will demand this number of percentage points in additional
# duty cycle for each degree of temperature above CPU_REF.
CPU_SCALE=4 # Integer only!
elif [ "${freenashost}" = "freenas1" ]; then
esxihost=IP_esxi_host
ipmihost=IP_iDRAC
datastores="VM_DATASET"
logdir=/mnt/Storage/scripts/logs
ipmiuser=user
ipmipw=password
IPMITOOL="/usr/local/bin/ipmitool -I lanplus -H "$ipmihost" -U "$ipmiuser" -P "$ipmipw""
DUTY_MIN=2 # Fan minimum duty cycle (%) (to avoid stalling)
# Reference temperature (C) for scaling CPU_DUTY (NOT a setpoint).
# At and below this temperature, CPU will demand minimum
# fan speed (DUTY_MIN above).
CPU_REF=55 # Integer only!
# Scalar for scaling CPU_DUTY.
# CPU will demand this number of percentage points in additional
# duty cycle for each degree of temperature above CPU_REF.
CPU_SCALE=2 # Integer only!
else
echo "Undefined system!"
exit 1
fi
Here is the script that I have setup for my Dell r720xd's
The one with dual e5-2640's is 'freenas1' and the one with dual e5-2690's is 'freenas'
The e5-2640 is fairly idle and is only used as a backup target, so it runs the fans fairly slow. It also seems to need less than 30% duty cycle on the fans to keep it cool even at 100% cpu usage.
The e5-2690 is used for the main freenas server and also runs Blue Iris with 10 camera's and also a Plex server. The CPU's run hotter and I control the fans until it gets to the 30% fan duty cycle and then let the iDRAC adjust from there (30% duty cycle is the minimum that iDRAC runs the fans at on my system)
I may work some more on the fan speeds for the 2 systems, but so far they seem to be running fine and are quieter then they were with the default iDRAC settings after installing the HBA.