forked from sunnivin/dotcime
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathconfig_batch.xml
150 lines (143 loc) · 6.53 KB
/
config_batch.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
<?xml version="1.0"?>
<config_batch version="2.1">
<!--
File: config_batch.xml
Purpose: abstract out the parts of run scripts that are different, and use this configuration to
create cesm run scripts from a single template.
batch_system: the batch system type and version
batch_query: the batch query command for each batch system.
batch_redirect: Whether a redirect character is needed to submit jobs.
batch_directive: The string that prepends a batch directive for the batch system.
jobid_pattern: A perl regular expression used to filter out the returned job id from a
queue submission.
===============================================================
batch_system
===============================================================
The batch_system and associated tags are meant for configuring batch systems and
queues across machines. The batch_system tag denotes the name for a particular
batch system, these can either be shared between one or more machines, or can be
defined for a specific machine if need be.
Machine specific entries take precidence over generic entries, directives are appended
queues:
one or more queues can be defined per batch_system. if the attribute default="true"
is used, then that queue will be used by default. Alternatively, multiple queues can
be used. The following variables can be used to choose a queue :
walltimemin: Giving the minimum amount of walltime for the queue.
walltimemax: The maximum amount of walltime for a queue.
nodemin: The minimum node count required to use this queue.
nodemax: The maximum node count required to use this queue.
jobmin: The minimum task count required to use this queue. This should only rarely be used to select queues that only use a fraction of a node. This cannot be used in conjuction with nodemin.
jobmax: The maximum task count required to use this queue. This should only rarely be used to select queues that only use a fraction of a node. This cannot be used in conjuction with nodemax.
-->
<batch_system type="slurm">
<batch_query per_job_arg="-j">squeue</batch_query>
<batch_cancel>scancel</batch_cancel>
<batch_directive>#SBATCH</batch_directive>
<jobid_pattern>(\d+)$</jobid_pattern>
<depend_string> --dependency=afterok:jobid</depend_string>
<depend_allow_string> --dependency=afterany:jobid</depend_allow_string>
<depend_separator>,</depend_separator>
<walltime_format>%H:%M:%S</walltime_format>
<batch_mail_flag>--mail-user</batch_mail_flag>
<batch_mail_type_flag>--mail-type</batch_mail_type_flag>
<batch_mail_type>none, all, begin, end, fail</batch_mail_type>
<directives>
<directive> --job-name={{ job_id }}</directive>
<directive> --nodes={{ num_nodes }}</directive>
<directive> --ntasks-per-node={{ tasks_per_node }}</directive>
<directive> --output={{ job_id }}</directive>
</directives>
</batch_system>
<batch_system MACH="betzy" type="slurm">
<batch_submit>sbatch</batch_submit>
<submit_args>
<argument> --time $JOB_WALLCLOCK_TIME </argument>
<argument> --account $PROJECT </argument>
</submit_args>
<directives>
<directive> --ntasks={{ total_tasks }}</directive>
<directive> --export=ALL</directive>
<directive> --switches=1</directive>
</directives>
<directives queue = "normal">
<directive> --partition=normal</directive>
</directives>
<directives queue = "devel">
<directive> --partition=normal</directive>
<directive> --qos=devel</directive>
</directives>
<directives queue = "preproc">
<directive> --partition=preproc</directive>
<directive> --mem=16G</directive>
</directives>
<queues>
<queue walltimemax="00:59:00" nodemin="4" nodemax="1024" default="true">normal</queue>
<queue walltimemax="00:59:00" nodemin="1" nodemax="1" >preproc</queue>
<queue walltimemax="00:59:00" nodemin="1" nodemax="4" >devel</queue>
</queues>
</batch_system>
<batch_system MACH="fram" type="slurm">
<batch_submit>sbatch</batch_submit>
<submit_args>
<argument> --time $JOB_WALLCLOCK_TIME </argument>
<argument> --account $PROJECT </argument>
</submit_args>
<directives>
<directive> --ntasks={{ total_tasks }}</directive>
<directive> --export=ALL</directive>
<directive> --switches=1</directive>
</directives>
<directives queue = "normal">
<directive> --partition=normal</directive>
</directives>
<directives queue = "devel">
<directive> --partition=normal</directive>
<directive> --qos=devel</directive>
</directives>
<directives queue = "short">
<directive> --partition=normal</directive>
<directive> --qos=short</directive>
</directives>
<directives queue = "bigmem">
<directive> --partition=bigmem</directive>
<directive> --mem-per-cpu=8G</directive>
</directives>
<queues>
<queue walltimemax="00:59:00" nodemin="1" nodemax="32" default="true">normal</queue>
<queue walltimemax="00:29:00" nodemin="1" nodemax="8" >devel</queue>
<queue walltimemax="00:59:00" nodemin="1" nodemax="10" >short</queue>
<queue walltimemax="00:29:00" nodemin="1" nodemax="8" >bigmem</queue>
</queues>
</batch_system>
<batch_system MACH="saga" type="slurm">
<batch_submit>sbatch</batch_submit>
<submit_args>
<arg flag="--time" name="$JOB_WALLCLOCK_TIME" />
<arg flag="-p" name="$JOB_QUEUE" />
<arg flag="--account" name="$PROJECT" />
</submit_args>
<directives>
<directive> --ntasks={{ total_tasks }}</directive>
<directive> --export=ALL</directive>
<directive> --switches=1</directive>
<directive> --mem-per-cpu=3G</directive>
</directives>
<directives queue="devel">
<directive> --ntasks={{ total_tasks }}</directive>
<directive> --export=ALL</directive>
<directive> --switches=1</directive>
<directive> --qos=devel</directive>
</directives>
<directives queue="bigmem">
<directive> --ntasks={{ total_tasks }}</directive>
<directive> --export=ALL</directive>
<directive> --switches=1</directive>
<directive> --mem-per-cpu=8G</directive>
</directives>
<queues>
<queue walltimemax="02:00:00" nodemin="1" nodemax="64" default="true">normal</queue>
<queue walltimemax="00:30:00" nodemin="1" nodemax="2" default="false">devel</queue>
<queue walltimemax="50:00:00" nodemin="1" nodemax="8" default="false">bigmem</queue>
</queues>
</batch_system>
</config_batch>