-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCheckQuota.sh
More file actions
executable file
·86 lines (67 loc) · 3.02 KB
/
CheckQuota.sh
File metadata and controls
executable file
·86 lines (67 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
# File : CheckQuota.sh
# Author : Anton Riedel <anton.riedel@tum.de>
# Date : 01.12.2021
# Last Modified Date: 03.03.2022
# Last Modified By : Anton Riedel <anton.riedel@tum.de>
# check if we can submit another masterjob to grid
[ ! -f config.json ] && echo "No config file!!!" && exit 1
# get variables from config file
ThresholdActiveSubjobs="$(jq -r '.misc.ThresholdActiveSubjob' config.json)"
ThresholdRunningTime="$(jq -r '.misc.ThresholdRunningTime' config.json)"
ThresholdCpuCost="$(jq -r '.misc.ThresholdCpuCost' config.json)"
# variables
ActiveSubjobs="0"
CPUCost="0"
MasterjobsNotReady="0"
MasterjobsInError="0"
DiskSpace="0"
GetQuota() {
# fill global variables
ActiveSubjobs=$(alien_ps -X | wc -l)
RunningTime=$(alien.py quota | awk '/Running time/{gsub("%","",$(NF-1));print int($(NF-1))}')
CPUCost=$(alien.py quota | awk '/CPU Cost/{gsub("%","",$(NF-1));print int($(NF-1))}')
DiskSpace=$(alien.py quota | awk '/Storage size/{gsub("%","",$NF);print int($NF)}')
MasterjobsNotReady=$(alien_ps -M -W | wc -l)
MasterjobsInError=$(alien_ps -M -E | wc -l)
return 0
}
echo "################################################################################"
echo "Checking quota"
echo "################################################################################"
GetQuota
echo "$ActiveSubjobs Subjobs are active"
echo "$RunningTime/100% Running time is used"
echo "$CPUCost/100% CPU cost is used"
if [ $ActiveSubjobs -gt $ThresholdActiveSubjobs ] || [ $RunningTime -gt $ThresholdRunningTime ] || [ $CPUCost -gt $ThresholdCpuCost ]; then
echo "Threshold exceeded, wait ..."
echo "$ActiveSubjobs/$ThresholdActiveSubjobs are running/waiting"
echo "$RunningTime/$ThresholdRunningTime Running Time was used"
echo "$CPUCost/$ThresholdCpuCost CPU cost was used"
echo "$DiskSpace/100% disk space is used"
echo "################################################################################"
exit 1
fi
echo "################################################################################"
echo "Checking quotas passed, checking status of other masterjobs"
echo "################################################################################"
echo "$MasterjobsNotReady masterjobs are not running yet"
if [ $MasterjobsNotReady -gt 1 ]; then
echo "More than 1 Masterjob is not running yet, wait ...."
echo "################################################################################"
exit 1
fi
if [ $MasterjobsInError -gt 0 ]; then
echo "Another Masterjob is in error, resubmit and wait ..."
alien_ps -E -M | awk '{print $2}' | parallel --progress --bar "alien.py resubmit {}"
echo "################################################################################"
exit 1
fi
if [ "$DiskSpace" -gt "90" ]; then
echo "Disk space is running out, break..."
exit 1
fi
echo "################################################################################"
echo "All checks passed, we are good to go!"
echo "################################################################################"
exit 0