Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ATA disk mapping, zpool check, netdev check, and a config file #15

Merged
merged 26 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
47bc5ed
support zpool checking in ugreen-diskiomon
miskcoo Jun 20, 2024
45421b1
remove unnecessary script for checking zpool status
miskcoo Jun 20, 2024
9e6d9ac
fix a typo
miskcoo Jun 20, 2024
5c621c0
fix a typo
miskcoo Jun 20, 2024
e87c026
a todo comment for unhealthy zpool
miskcoo Jun 20, 2024
a1bc696
add the disk mapping method by ATA
miskcoo Jun 20, 2024
4272941
read disk serial from env
miskcoo Jun 20, 2024
a84ca1e
add a config file (#14)
miskcoo Jun 20, 2024
0592c3b
fix: remove the trailing slash in the output of ls -ahl
miskcoo Jun 20, 2024
5743afd
add comments about the config file
miskcoo Jun 20, 2024
3377233
refine the ata mapping
miskcoo Jun 20, 2024
f553e84
move the disk online check outside the activities checking loop
miskcoo Jun 20, 2024
10b4439
add configs for netdevmon
miskcoo Jun 20, 2024
46dd3e2
Merge branch 'zpool-monitoring' into ata-disk-mapping
miskcoo Jun 20, 2024
a37dcea
add configs for zpool monitoring
miskcoo Jun 20, 2024
aa26e71
make the color configurable
miskcoo Jun 20, 2024
78693c5
support configuring the netdev color, and monitoring the gw / link st…
miskcoo Jun 20, 2024
49309ec
update the systemd services and readme files
miskcoo Jun 20, 2024
90e530c
fix: null index in array
miskcoo Jun 21, 2024
60ef585
mapping array for DXP4800 series
miskcoo Jun 21, 2024
c98bde4
remove unnecessary sleep when module is loaded
miskcoo Jun 21, 2024
bc2bb10
update readme
miskcoo Jun 21, 2024
344a45f
further description about the conf
miskcoo Jun 21, 2024
5c9c22e
config for the brightness of leds
miskcoo Jun 21, 2024
f58940a
change the color of some messages
miskcoo Jun 21, 2024
7d7ad13
update readme
miskcoo Jun 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 172 additions & 58 deletions scripts/ugreen-diskiomon
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exit-ugreen-diskiomon() {
rm "/var/run/ugreen-diskiomon.lock"
fi
kill $smart_check_pid 2>/dev/null
kill $zpool_check_pid 2>/dev/null
kill $disk_online_check_pid 2>/dev/null
}

# trap exit and remove lockfile
Expand All @@ -23,42 +25,50 @@ if [[ -f /boot/config/plugins/ugreenleds-driver/settings.cfg ]]; then
source /boot/config/plugins/ugreenleds-driver/settings.cfg
fi

# load environment variables
if [[ -f /etc/ugreen-leds.conf ]]; then
source /etc/ugreen-leds.conf
fi

# led-disk mapping (see https://github.com/miskcoo/ugreen_dx4600_leds_controller/pull/4)
MAPPING_METHOD=${MAPPING_METHOD:=hctl} # hctl, serial
MAPPING_METHOD=${MAPPING_METHOD:=ata} # ata, hctl, serial
led_map=(disk1 disk2 disk3 disk4 disk5 disk6 disk7 disk8)

# hctl, $> lsblk -S -x hctl -o hctl,serial,name
# NOTE: It is reported that the order below should be adjusted for each model.
# Please check the disk mapping section in https://github.com/miskcoo/ugreen_dx4600_leds_controller/blob/master/README.md.
hctl_map=("0:0:0:0" "1:0:0:0" "2:0:0:0" "3:0:0:0" "4:0:0:0" "5:0:0:0" "6:0:0:0" "7:0:0:0")
# serial number, $> lsblk -S -x hctl -o hctl,serial,name
serial_map=(${DISK_SERIAL})
# ata number, $> ls /sys/block | egrep ata\d
ata_map=("ata1" "ata2" "ata3" "ata4" "ata5" "ata6" "ata7" "ata8")

if which dmidecode; then
product_name=$(dmidecode --string system-product-name)
case "${product_name}" in
"DXP6800 Pro")
echo "Found UGREEN DXP6800 Pro"
hctl_map=("2:0:0:0" "3:0:0:0" "4:0:0:0" "5:0:0:0" "0:0:0:0" "1:0:0:0")
;;
"DX4600 Pro")
echo "Found UGREEN DX4600 Pro"
# using the default mapping
;;
"DXP8800 Plus")
echo "Found UGREEN DXP8800 Plus"
# using the default mapping
;;
DXP6800*) # tested on DXP6800 Pro
echo "Found UGREEN DXP6800 series"
hctl_map=("2:0:0:0" "3:0:0:0" "4:0:0:0" "5:0:0:0" "0:0:0:0" "1:0:0:0")
ata_map=("ata3" "ata4" "ata5" "ata6" "ata1" "ata2")
;;
DX4600*) # tested on DX4600 Pro
echo "Found UGREEN DX4600 series"
;;
DXP8800*) # tested on DXP8800 Plus
echo "Found UGREEN DXP8800 series"
# using the default mapping
;;
*)
if [[ "${MAPPING_METHOD}" = "hctl" ]]; then
echo "Using the default HCTL order. Please check it maps to your disk slots correctly."
echo "If you confirm that the HCTL order is correct, or find it is different, you can submit an issue to let us know, so we can update the script."
echo "(Read the disk mapping section in https://github.com/miskcoo/ugreen_dx4600_leds_controller/blob/master/README.md for more details)"
fi
;;
if [[ "${MAPPING_METHOD}" = "hctl" ]]; then
echo "Using the default HCTL order. Please check it maps to your disk slots correctly."
echo "If you confirm that the HCTL order is correct, or find it is different, you can submit an issue to let us know, so we can update the script."
echo "(Read the disk mapping section in https://github.com/miskcoo/ugreen_dx4600_leds_controller/blob/master/README.md for more details)"
fi
;;
esac
elif [[ "${MAPPING_METHOD}" = "hctl" ]]; then
echo "installing the tool `dmidecode` is suggested; otherwise the script cannot detect your device and adjust the hctl_map"
fi
# serial number, $> lsblk -S -x hctl -o hctl,serial,name
serial_map=("placeholder0" "placeholder1" "placeholder2" "placeholder3" "placeholder4" "placeholder5" "placeholder6" "placeholder7")
declare -A devices

# set monitor SMART information to true by default if not running unRAID
Expand All @@ -72,46 +82,73 @@ CHECK_SMART_INTERVAL=${CHECK_SMART_INTERVAL:=360}
# refresh interval from disk leds
LED_REFRESH_INTERVAL=${LED_REFRESH_INTERVAL:=0.1}

# whether to check zpool health
CHECK_ZPOOL=${CHECK_ZPOOL:=false}
# polling rate for checking zpool health. 5 seconds by default
CHECK_ZPOOL_INTERVAL=${CHECK_ZPOOL_INTERVAL:=5}

# polling rate for checking disk online. 5 seconds by default
CHECK_DISK_ONLINE_INTERVAL=${CHECK_DISK_ONLINE_INTERVAL:=5}

COLOR_DISK_HEALTH=${COLOR_DISK_HEALTH:="255 255 255"}
COLOR_DISK_UNAVAIL=${COLOR_DISK_UNAVAIL:="255 0 0"}
COLOR_ZPOOL_FAIL=${COLOR_ZPOOL_FAIL:="255 0 0"}
COLOR_SMART_FAIL=${COLOR_SMART_FAIL:="255 0 0"}

{ lsmod | grep ledtrig_oneshot ; } || modprobe -v ledtrig_oneshot

{ lsmod | grep ledtrig_oneshot > /dev/null; } || modprobe -v ledtrig_oneshot

sleep 2

function disk_enumerating_string() {
if [[ $MAPPING_METHOD = ata ]]; then
ls -ahl /sys/block | sed 's/\/$//' | awk '{
if (match($0, /ata[0-9]+/)) {
ata = substr($0, RSTART, RLENGTH);
if (match($0, /[^\/]+$/)) {
basename = substr($0, RSTART, RLENGTH);
print basename, ata;
}
}
}'
elif [[ $MAPPING_METHOD = hctl ]] || [[ $MAPPING_METHOD = serial ]]; then
lsblk -S -o name,${MAPPING_METHOD},tran | grep sata
else
echo Unsupported mapping method: ${MAPPING_METHOD}
exit 1
fi
}

echo Enumerating disks based on $MAPPING_METHOD...
declare -A dev_map
while read line
do
blk_line=($line)
if [[ $MAPPING_METHOD = hctl ]]; then
key=${blk_line[1]}
val=${blk_line[0]}
elif [[ $MAPPING_METHOD = serial ]]; then
key=${blk_line[1]}
val=${blk_line[0]}
else
echo Unsupported mapping method: ${MAPPING_METHOD}
exit 1
fi
key=${blk_line[1]}
val=${blk_line[0]}
dev_map[${key}]=${val}
echo $MAPPING_METHOD ${key} ">>" ${dev_map[${key}]}
done <<< "$(lsblk -S -o name,${MAPPING_METHOD} | tail -n +2)"
done <<< "$(disk_enumerating_string)"

# initialize LEDs
declare -A dev_to_led_map
for i in "${!led_map[@]}"; do
led=${led_map[i]}
if [[ -d /sys/class/leds/$led ]]; then
echo oneshot > /sys/class/leds/$led/trigger
echo 1 > /sys/class/leds/$led/invert
echo 100 > /sys/class/leds/$led/delay_on
echo 100 > /sys/class/leds/$led/delay_off
echo "255 255 255" > /sys/class/leds/$led/color
echo "$COLOR_DISK_HEALTH" > /sys/class/leds/$led/color

# find corresponding device
_tmp_str=${MAPPING_METHOD}_map[@]
_tmp_arr=(${!_tmp_str})
dev=${dev_map[${_tmp_arr[i]}]}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I get two errors: (because I only have 6 and not 8 disk slots)
/usr/bin/ugreen-diskiomon: line 147: dev_map: bad array subscript
/usr/bin/ugreen-diskiomon: line 147: dev_map: bad array subscript

DEBUG: _tmp_arr[0]='ata3'
DEBUG: _tmp_arr[1]='ata4'
DEBUG: _tmp_arr[2]='ata5'
DEBUG: _tmp_arr[3]='ata6'
DEBUG: _tmp_arr[4]='ata1'
DEBUG: _tmp_arr[5]='ata2'
DEBUG: _tmp_arr[6]=''
DEBUG: _tmp_arr[7]=''

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is fixed now


if [[ -f /sys/class/block/${dev}/stat ]]; then
devices[$led]=${dev}
dev_to_led_map[$dev]=$led
else
# turn off the led if no disk installed on this slot
echo 0 > /sys/class/leds/$led/brightness
Expand All @@ -120,19 +157,94 @@ for i in "${!led_map[@]}"; do
fi
done

# construct zpool device mapping
declare -A zpool_ledmap
if [ "$CHECK_ZPOOL" = true ]; then
echo Enumerating zpool devices...
while read line
do
zpool_dev_line=($line)
zpool_dev_name=${zpool_dev_line[0]}
zpool_scsi_dev_name="unknown"
# zpool_dev_state=${zpool_dev_line[1]}
case "$zpool_dev_name" in
sd*)
# remove the trailing partition number
zpool_scsi_dev_name=$(echo $zpool_dev_name | sed 's/[0-9]*$//')
;;
dm*)
# find the underlying block device of the encrypted device
dm_slaves=($(ls /sys/block/${zpool_dev_name}/slaves))
zpool_scsi_dev_name=${dm_slaves[0]}
;;
*)
echo Unsupported zpool device type ${zpool_dev_name}.
;;
esac

# if the detected scsi device can be found in the mapping array
#echo zpool $zpool_dev_name ">>" $zpool_scsi_dev_name ">>" ${dev_to_led_map[${zpool_scsi_dev_name}]}
if [[ -v "dev_to_led_map[${zpool_scsi_dev_name}]" ]]; then
zpool_ledmap[$zpool_dev_name]=${dev_to_led_map[${zpool_scsi_dev_name}]}
echo "zpool device" $zpool_dev_name ">>" $zpool_scsi_dev_name ">> LED:"${zpool_ledmap[$zpool_dev_name]}
fi
done <<< "$(zpool status -L | egrep ^\\s*\(sd\|dm\))"

function zpool_check_loop() {
while true; do
while read line
do
zpool_dev_line=($line)
zpool_dev_name=${zpool_dev_line[0]}
zpool_dev_state=${zpool_dev_line[1]}

# TODO: do something if the pool is unhealthy?

if [[ -v "zpool_ledmap[${zpool_dev_name}]" ]]; then
led=${zpool_ledmap[$zpool_dev_name]}

if [[ "$(cat /sys/class/leds/$led/color)" != "$COLOR_DISK_HEALTH" ]]; then
continue;
fi

if [[ "${zpool_dev_state}" != "ONLINE" ]]; then
echo "$COLOR_ZPOOL_FAIL" > /sys/class/leds/$led/color
echo Disk failure detected on /dev/$dev at $(date +%Y-%m-%d' '%H:%M:%S)
fi

# ==== To recover from an error, you should restart the script ====
## case "${zpool_dev_state}" in
## ONLINE)
## # echo "$COLOR_DISK_HEALTH" > /sys/class/leds/$led/color
## ;;
## *)
## echo "255 0 0" > /sys/class/leds/$led/color
## ;;
## esac
fi
done <<< "$(zpool status -L | egrep ^\\s*\(sd\|dm\))"

sleep ${CHECK_ZPOOL_INTERVAL}s
done
}

zpool_check_loop &
zpool_check_pid=$!
fi

# check disk health if enabled
if [ "$CHECK_SMART" = true ]; then
(
while true; do
for led in "${!devices[@]}"; do
if [[ "$(cat /sys/class/leds/$led/color)" = "255 0 0" ]]; then
if [[ "$(cat /sys/class/leds/$led/color)" != "$COLOR_DISK_HEALTH" ]]; then
continue;
fi

dev=${devices[$led]}

if [[ -z $(smartctl -H /dev/${dev} | grep PASSED) ]]; then
echo "255 0 0" > /sys/class/leds/$led/color
echo "$COLOR_SMART_FAIL" > /sys/class/leds/$led/color
echo Disk failure detected on /dev/$dev at $(date +%Y-%m-%d' '%H:%M:%S)
continue
fi
Expand All @@ -143,40 +255,42 @@ if [ "$CHECK_SMART" = true ]; then
smart_check_pid=$!
fi

# check for zpool-leds.sh and set variable
if [[ -f /usr/bin/zpool-leds.sh ]]; then
ZPOOL_LEDS="bash /usr/bin/zpool-leds.sh"
else
ZPOOL_LEDS=""
fi
# check disk online status
(
while true; do
for led in "${!devices[@]}"; do
dev=${devices[$led]}

if [[ "$(cat /sys/class/leds/$led/color)" != "$COLOR_DISK_HEALTH" ]]; then
continue;
fi

if [[ ! -f /sys/class/block/${dev}/stat ]]; then
echo "$COLOR_DISK_UNAVAIL" > /sys/class/leds/$led/color 2>/dev/null
echo Disk /dev/$dev went offline at $(date +%Y-%m-%d' '%H:%M:%S)
continue
fi
done
sleep ${CHECK_DISK_ONLINE_INTERVAL}s
done
) &
disk_online_check_pid=$!

# monitor disk activities
declare -A diskio_data_rw
while true; do
for led in "${!devices[@]}"; do
dev=${devices[$led]}
diskio_old_rw="${diskio_data_rw[$led]}"

if [[ "$(cat /sys/class/leds/$led/color)" = "255 0 0" ]]; then
continue;
fi

if [[ ! -f /sys/class/block/${dev}/stat ]]; then
echo "255 0 0" > /sys/class/leds/$led/color 2>/dev/null
echo Disk /dev/$dev went offline at $(date +%Y-%m-%d' '%H:%M:%S)
continue
fi

diskio_new_rw="$(cat /sys/block/${dev}/stat)"
# if $dev does not exist, diskio_new_rw="", which will be safe
diskio_new_rw="$(cat /sys/block/${devices[$led]}/stat 2>/dev/null)"

if [ "${diskio_old_rw}" != "${diskio_new_rw}" ]; then
if [ "${diskio_data_rw[$led]}" != "${diskio_new_rw}" ]; then
echo 1 > /sys/class/leds/$led/shot
fi

diskio_data_rw[$led]=$diskio_new_rw
done

${ZPOOL_LEDS}

sleep ${LED_REFRESH_INTERVAL}s

done
Loading