# # MAUI configugation for the NIKHEF NDPF # Cluster: lcgprod # # @(#)maui.cfg David Groep 20040803.3 # for MAUI version 3.2.6p8 # SERVERHOST tbn18.nikhef.nl ADMIN1 root davidg ADMIN3 templon fokke ADMINHOST tbn18.nikhef.nl tbn04.nikhef.nl localhost.localdomain localhost RMTYPE[0] PBS RMHOST[0] tbn18.nikhef.nl RMSERVER[0] tbn18.nikhef.nl SERVERPORT 40559 SERVERMODE NORMAL # Set PBS server polling interval. Since we have many short jobs # and want fast turn-around, set this to 15 seconds (default: 2 minutes) RMPOLLINTERVAL 00:00:15 # a max. 10 MByte log file in a logical location LOGFILE /var/log/maui.log LOGFILEMAXSIZE 50000000 LOGLEVEL 3 LOGFILEROLLDEPTH 5 NODESETPOLICY ONEOF NODESETATTRIBUTE FEATURE NODESETLIST dzero halloween ncf #NODESETDELAY 999:23:59:59 NODESETDELAY 0:00:00 NODESYNCTIME 0:00:05 NODEACCESSPOLICY SHARED NODEAVAILABILITYPOLICY DEDICATED:PROCS DEFERTIME 0 JOBMAXOVERRUN 0 REJECTNEGPRIOJOBS FALSE FEATUREPROCSPEEDHEADER xps # Policies BACKFILLPOLICY ON BACKFILLTYPE FIRSTFIT NODEALLOCATIONPOLICY FASTEST RESERVATIONPOLICY NEVER #RESERVATIONPOLICY CURRENTHIGHEST #MAXJOBPERUSERPOLICY OFF #MAXJOBPERUSERCOUNT 8 #MAXPROCPERUSERPOLICY OFF #MAXPROCPERUSERCOUNT 256 #MAXPROCSECONDPERUSERPOLICY OFF #MAXPROCSECONDPERUSERCOUNT 36864000 #MAXJOBQUEUEDPERUSERPOLICY OFF #MAXJOBQUEUEDPERUSERCOUNT 2 #MAXPROCPERGROUPPOLICY OFF #SMAXPROCPERGROUPCOUNT 128 #MAXPROCPERGROUPCOUNT 160 # Priority Weights QUEUETIMEWEIGHT 1 XFACTORWEIGHT 1 XFACTORCAP 1 RESWEIGHT 10 CREDWEIGHT 9 USERWEIGHT 10 GROUPWEIGHT 10 FSWEIGHT 90 FSUSERWEIGHT 10 FSGROUPWEIGHT 10 # FairShare # use dedicated CPU ("wallclocktime used") metering # decays over 6 days FSPOLICY DEDICATEDPES FSDEPTH 24 FSINTERVAL 6:00:00 FSDECAY 0.99 FSCAP 100000 ############################################################################## # Fair shares and limits # # Policies to implement # * ANTARES should get 5THzEquivHours/month (as per directive KarelG,20040803) # * LHC should get "the rest" # * we must honous NCF/NL-Grid jobs on at least 120 CPUs (NCF farm) # * 10% of the resources is for test/dteam/health-monitoring # # Relationship of priorities: # CREDWEIGHT 9% (0.5 u/0.5 g); FSWEIGHT 98% (0.5 u/0.5 g); QTIME 1% # # To get reasonably fair scheduling, there should be a free slot every, say, # 15 minutes. That means that, with 250 CPUs, the maxwalltime should be # 15*250 min = 62 hrs, so "qlong" is already somewhat long. # But, since we will allocate at random slow and fast nodes (slowest=0.8GHz # and we have on average 340 GHzEquiv now so avg speedratio=1.7), we can # tolerate a queue size of up to 62*1.7 hrs = 105 hrs. # # Thus, the "qlong" queue of 96 hours is fine, but the infinite queue must # be phased out again (it was abused anyway and never fulfilled the # original purpose). # GROUPCFG[DEFAULT] FSTARGET=0 PRIORITY=1 MAXPROC=10 # the limits applied appear to be a MIN() of all applicable limits, so e.g. # since alice001 is not mentioned by name, his FSTARGET is MIN(1,40) = 1 # where the "1" is from DEFAULT USERCFG and the "40" is from alice GROUPCFG. GROUPCFG[users] FSTARGET=0 PRIORITY=10 MAXPROC=50 GROUPCFG[tmpusr] FSTARGET=0- PRIORITY=10 MAXPROC=2 GROUPCFG[tbadmin] FSTARGET=10 PRIORITY=5000 MAXPROC=200 GROUPCFG[dteam] FSTARGET=2 PRIORITY=5000 MAXPROC=4 GROUPCFG[tutor] PRIORITY=1 MAXPROC=2 GROUPCFG[alice] PRIORITY=100 MAXPROC=235 ADEF=lhc GROUPCFG[atlas] PRIORITY=200 MAXPROC=235 ADEF=lhc GROUPCFG[atlsgm] PRIORITY=200 MAXPROC=235 ADEF=lhc GROUPCFG[lhcb] PRIORITY=100 MAXPROC=235 ADEF=lhc GROUPCFG[lhcbsgm] PRIORITY=100 MAXPROC=235 ADEF=lhc GROUPCFG[cms] FSTARGET=1- PRIORITY=1 MAXPROC=2 ADEF=lhc GROUPCFG[dzero] FSTARGET=5 PRIORITY=100 MAXPROC=100 GROUPCFG[biome] FSTARGET=5 PRIORITY=5 MAXPROC=2 GROUPCFG[eo] FSTARGET=5 PRIORITY=5 MAXPROC=32 # can increate the prio for nikanter to 2000 (relative to 200 for LHC), since # the FSWEIGHT/CREDWEIGHT is 10 as well (they will thus balance) GROUPCFG[nikantar] FSTARGET=3 PRIORITY=2000 MAXPROC=100 ADEF=niklocal GROUPCFG[niktheor] FSTARGET=1- PRIORITY=1 MAXPROC=1 ADEF=niklocal GROUPCFG[nikdzero] PRIORITY=100 MAXPROC=170 ADEF=niklocal USERCFG[svens] FSTARGET=0 PRIORITY=1 MAXPROC=32 USERCFG[sander] PRIORITY=100 MAXPROC=150 ADEF=lhc USERCFG[s64] FSTARGET=0 PRIORITY=1 MAXPROC=32 # versto: maxproc=120 because of size of NCF farm USERCFG[versto] FSTARGET=0- PRIORITY=1 MAXPROC=120 USERCFG[davides] PRIORITY=500000 USERCFG[davidg] PRIORITY=500000 USERCFG[templon] PRIORITY=500000 ACCOUNTCFG[lhc] FSTARGET=87 MAXPROC=265 ACCOUNTCFG[niklocal] MAXPROC=125 CLASSCFG[qinfinite] PRIORITY=1