Details
-
Bug
-
Resolution: Fixed
-
Critical
-
Cheshire-Cat
-
Untriaged
-
1
-
Unknown
Description
When menelaus_web_alert:check(disk is checking to see if any filesystems of interest (ones we use) are over capacity a crash may occur. The crash happens when the filesystem of interest is not in the results of the "df -alk" command.
Here's some data from a customer case.
# curl -u username:password localhost:8091/diag/eval -d '{[ns_storage_conf:this_node_dbdir(), ns_storage_conf:this_node_ixdir(),ns_storage_conf:this_node_logdir(), ns_audit_cfg:get_log_path()], ns_disksup:get_disk_data()}.'
|
{[{ok,"/CouchData"},
|
{ok,"/CouchIndex"},
|
{ok,"/opt/couchbase/var/lib/couchbase/ns_log"},
|
{ok,"/GDlogs/couchbase"}],
|
[{"/dev",98913024,0},
|
{"/Envelope",412716456,21},
|
{"/GDlogs",412716456,21},
|
{"/Software",2064104848,39},
|
{"/CouchIndex",30832548,8},
|
{"/tmp",20511312,1},
|
{"/CouchData",51474912,2},
|
{"/Database",103080888,8},
|
{"/GDdata",2064104848,39},
|
{"/conf",2064104848,39},
|
{"/var/TimesSquare",103080888,8},
|
{"/etc/localtime",4513440,32},
|
{"/dev/shm",65536,0},
|
{"/etc/hostname",20511312,21},
|
{"/run/baseball",20511312,21},
|
{"/sys/fs/cgroup",98913024,0},
|
{"/run",65536,1},
|
{"/run/lock",65536,1},
|
{"/var/log/journal",65536,0},
|
{"/var/log/journal/3a83408691bbff65bd07a7a92c59f729",51474912,18},
|
{"/run/user/0",19782608,0}]}
|
and
# /bin/df -alk
|
Filesystem 1K-blocks Used Available Use% Mounted on
|
proc 0 0 0 - /proc
|
tmpfs 98913024 0 98913024 0% /dev
|
devpts 0 0 0 - /dev/pts
|
sysfs 0 0 0 - /sys
|
mqueue 0 0 0 - /dev/mqueue
|
/dev/mapper/vg01-GDlogs 412716456 81269888 313583732 21% /Envelope
|
/dev/mapper/vg01-GDlogs 412716456 81269888 313583732 21% /GDlogs
|
/dev/mapper/vg01-GDdata 2064104848 762479224 1215606908 39% /Software
|
/dev/mapper/vg01-CouchIndex 30832548 2067672 27175628 8% /CouchIndex
|
/dev/mapper/vg01-GDtmp 20511312 130248 19316104 1% /tmp
|
/dev/mapper/vg01-CouchData 51474912 562636 48274452 2% /CouchData
|
/dev/mapper/vg01-Database 103080888 7204952 90616672 8% /Database
|
/dev/mapper/vg01-GDdata 2064104848 762479224 1215606908 39% /GDdata
|
/dev/mapper/vg01-GDdata 2064104848 762479224 1215606908 39% /conf
|
/dev/mapper/vg01-Database 103080888 7204952 90616672 8% /var/TimesSquare
|
/dev/sda3 4513440 1344920 2916208 32% /etc/localtime
|
shm 65536 0 65536 0% /dev/shm
|
/dev/mapper/vg01-var_lib_docker 20511312 3980032 15466320 21% /etc/hostname
|
/dev/mapper/vg01-var_lib_docker 20511312 3980032 15466320 21% /run/baseball
|
/dev/mapper/vg01-var_lib_docker 20511312 3980032 15466320 21% /home/autoinstall/GD_Load
|
tmpfs 98913024 0 98913024 0% /sys/fs/cgroup
|
cgroup 0 0 0 - /sys/fs/cgroup/systemd
|
cgroup 0 0 0 - /sys/fs/cgroup/net_cls,net_prio
|
cgroup 0 0 0 - /sys/fs/cgroup/hugetlb
|
cgroup 0 0 0 - /sys/fs/cgroup/devices
|
cgroup 0 0 0 - /sys/fs/cgroup/blkio
|
cgroup 0 0 0 - /sys/fs/cgroup/cpu,cpuacct
|
cgroup 0 0 0 - /sys/fs/cgroup/pids
|
cgroup 0 0 0 - /sys/fs/cgroup/perf_event
|
cgroup 0 0 0 - /sys/fs/cgroup/cpuset
|
cgroup 0 0 0 - /sys/fs/cgroup/freezer
|
cgroup 0 0 0 - /sys/fs/cgroup/memory
|
tmpfs 65536 264 65272 1% /run
|
tmpfs 65536 8 65528 1% /run/lock
|
tmpfs 65536 0 65536 0% /var/log/journal
|
/dev/mapper/vg01-var_log 51474912 8682068 40553524 18% /var/log/journal/3a83408691bbff65bd07a7a92c59f729
|
cgroup 0 0 0 - /sys/fs/cgroup/systemd
|
cgroup 0 0 0 - /sys/fs/cgroup/systemd/system.slice/docker-3a83408691bbff65bd07a7a92c59f729354c9a5284ba5b655111fb13e572c07b.scope
|
systemd-1 - - - - /proc/sys/fs/binfmt_misc
|
configfs 0 0 0 - /sys/kernel/config
|
debugfs 0 0 0 - /sys/kernel/debug
|
sunrpc 0 0 0 - /var/lib/nfs/rpc_pipefs
|
binfmt_misc 0 0 0 - /proc/sys/fs/binfmt_misc
|
tmpfs 19782608 0 19782608 0% /run/user/0
|
Note for "/opt/couchbase/var/lib/couchbase/ns_log" the mountpoint "/" is not in the "df -alk" output. From the couchbase.log for the case we see it is listed in the "df -ha" result
Filesystem
|
df -ha
|
==============================================================================
|
Filesystem Size Used Avail Use% Mounted on
|
/dev/mapper/docker-253:2-786433-de8ca3a99f2730c1189230d8682dd88c5905c0c618f5da5917804fd7d5552a8d 10G 4.8G 5.3G 48% /
|
and also /proc/mounts (it looks to have two "/" mountpoints).
Raw /proc/mounts
|
cat /proc/mounts
|
==============================================================================
|
rootfs / rootfs rw 0 0
|
/dev/mapper/docker-253:2-786433-de8ca3a99f2730c1189230d8682dd88c5905c0c618f5da5917804fd7d5552a8d / xfs rw,relatime,nouuid,attr2,inode64,sunit=256,swidth=256,noquota 0 0
|
The code that crashes is in menelaus_web_alerts:check(disk
check(disk, Opaque, _History, _Stats) -> |
|
Mounts = ns_disksup:get_disk_data(), |
|
UsedPre = [ns_storage_conf:this_node_dbdir(), |
ns_storage_conf:this_node_ixdir(), |
ns_storage_conf:this_node_logdir(), |
ns_audit_cfg:get_log_path()], |
UsedFiles = [X || {ok, X} <- UsedPre], |
|
RealPaths = [misc:realpath(File, "/") || File <- UsedFiles], |
|
UsedMountsTmp = |
[begin {ok, Mnt} = ns_storage_conf:extract_disk_stats_for_path(Mounts, RealFile), |
Mnt |
end || {ok, RealFile} <- RealPaths], |
where ns_storage_conf:extract_disk_stats_for_path returns none.