Fullscreen
Loading...
 
Imprimir Comparte esta página

condor

DEBIAN

Condor Examples
configurar primero /etc/hosts hosts
añadir el repo
/etc/apt/sources.list.d/condor.list
deb http://research.cs.wisc.edu/htcondor/debian/stable/(external link) wheezy contrib
addgroup --gid 501 condor
useradd --uid 501 --gid 501 condor

Instalar los siguiente
  1. apt-get install openjdk-6-jdk openjdk-6-jre java-propose-classpath condor
Ojo instalar tambien el paquete de java.



El Master en Gfif


/etc/condor/condor_config.local

##  What machine is your central manager?
CONDOR_HOST = gfif.udea.edu.co

## Pool's short description
COLLECTOR_NAME = Personal Condor at $(FULL_HOSTNAME)

##  When is this machine willing to start a job? 
START = TRUE

##  When to suspend a job?
SUSPEND = FALSE

##  When to nicely stop a job?
##  (as opposed to killing it instantaneously)

PREEMPT = FALSE


##  When to instantaneously kill a preempting job
##  (e.g. if a job is in the pre-empting stage for too long)

KILL = FALSE

##  This macro determines what daemons the condor_master will start and keep its watchful eyes on.
##  The list is a comma or space separated list of subsystem names

DAEMON_LIST = COLLECTOR, MASTER, NEGOTIATOR, SCHEDD
SEC_DAEMON_AUTHENTICATION = required
SEC_DAEMON_AUTHENTICATION_METHODS = password
SEC_CLIENT_AUTHENTICATION_METHODS =  password,fs,gsi,kerberos
SEC_PASSWORD_FILE = /etc/condor/condor_credential
 
ALLOW_WRITE = *
 
UID_DOMAIN = udea.edu.co
FILESYSTEM_DOMAIN = udea.edu.co
CONDOR_ADMIN = omazapa@gfif.udea.edu.co
CONDOR_IDS = 501.501
NETWORK_INTERFACE = 200.24.17.54
#corrige los permisos
TRUST_UID_DOMAIN=true


luego como root
  1. condor_store_cred -c add

en /home/condor/condor_credential
debe estar compartida por el nfs
condor_config_val SEC_PASSWORD_FILE
/home/condor/condor_credential

chown root:root /home/condor/condor_credential
chmod 600 /home/condor/condor_credential
ls -l /home/condor/condor_credential
-rw------- 1 root root


Para poner nodos dedicados para el universo parallel añadir al final de
/etc/condor/condor_config.local

#configuracion para nodos dedicados
DedicatedScheduler = "DedicatedScheduler@gfif-wn0.udea.edu.co","DedicatedScheduler@gfif-wn1.udea.edu.co","DedicatedScheduler@gfif-wn2.udea.edu.co"
WANT_SUSPEND   = False
WANT_VACATE    = False
MPI_CONDOR_RSH_PATH = $(LIBEXEC)
STARTD_EXPRS = $(STARTD_EXPRS), DedicatedScheduler
RANK            = Scheduler =?= $(DedicatedScheduler)
UNUSED_CLAIM_TIMEOUT = 600


Los Nodos



primero el dominio bien

cat /etc/hosts
127.0.0.1 localhost
127.0.1.1 gfif-wn1.udea.edu.co gfif-wn1 wn1
addgroup --gid 501 condor
adduser --uid 501 --gid 501 condor

cat /etc/condor/condor_config.local 
##  What machine is your central manager?

CONDOR_HOST = 200.24.17.54

## Pool's short description

COLLECTOR_NAME = Personal Condor at $(FULL_HOSTNAME)

##  When is this machine willing to start a job? 

START = TRUE

#para decirle cuantas cpus(slots) del sistema tomo.
#NUM_CPUS = 6

##  When to suspend a job?

SUSPEND = FALSE


##  When to nicely stop a job?
##  (as opposed to killing it instantaneously)

PREEMPT = FALSE


##  When to instantaneously kill a preempting job
##  (e.g. if a job is in the pre-empting stage for too long)

KILL = FALSE

##  This macro determines what daemons the condor_master will start and keep its watchful eyes on.
##  The list is a comma or space separated list of subsystem names

DAEMON_LIST = MASTER, STARTD
SEC_DAEMON_AUTHENTICATION = required
SEC_DAEMON_AUTHENTICATION_METHODS = password
SEC_CLIENT_AUTHENTICATION_METHODS = password,fs,gsi,kerberos
SEC_PASSWORD_FILE = /etc/condor/condor_credential
 
ALLOW_WRITE = *
 
UID_DOMAIN = udea.edu.co
FILESYSTEM_DOMAIN = udea.edu.co
CONDOR_ADMIN = omazapa@gfif.udea.edu.co
CONDOR_IDS = 501.501
UWCS_START = true
UWCS_SUSPEND = (((CpuBusyTime > 2 * $(MINUTE)) && $(ActivationTimer) > 90))
#corrige los permisos
TRUST_UID_DOMAIN=true


Damos los permisos para los grupos y usuarios de condor con nuestros ids.
Para todos los nodos y el master.
chown -R condor.condor /var/log/condor/
chown -R condor.condor /var/run/condor/
chown -R condor.condor /var/lock/condor/
chown -R condor.condor /var/lib/condor/


Para gfif este tambien
chown -R condor.condor /var/spool/condor/



NOTA

Para los nodos que van a usar mpi en el /etc/condor/condor_config.local se les pone al final
lo siguiente para que sean de recursos dedicados.

DAEMON_LIST = MASTER, STARTD

WANT_SUSPEND    = False
WANT_VACATE     = False
START           = True
SUSPEND         = False
CONTINUE        = True
PREEMPT         = False
KILL            = False
RANK            = 0
#NOTA gfif.udea.edu.co es el scheduler, si se cambia a otro server cambiar 
#en la siguiente linea
DedicatedScheduler = "DedicatedScheduler@gfif.udea.edu.co"

SUSPEND         = Scheduler =!= $(DedicatedScheduler) && ($(SUSPEND))
PREEMPT         = Scheduler =!= $(DedicatedScheduler) && ($(PREEMPT))
RANK_FACTOR     = 1000000
RANK            = (Scheduler =?= $(DedicatedScheduler) * \
                  $(RANK_FACTOR)) + $(RANK)
START           = (Scheduler =?= $(DedicatedScheduler)) || ($(START))

MPI_CONDOR_RSH_PATH = $(LIBEXEC)
CONDOR_SSHD = /usr/sbin/sshd
CONDOR_SSH_KEYGEN = /usr/bin/ssh-keygen
STARTD_EXPRS = $(STARTD_EXPRS), DedicatedScheduler


Por ultimos añadir la credencial.
para poner las credenciales
cexec condor_store_cred -c add -p pass

y ponemos el pass del cluster


Manual de instalación de condor en Scientific Linux 6

con los repositorios de la OSG
Para el Master primero instalar el CE de la OSG
https://www.opensciencegrid.org/bin/view/Documentation/Release3/InstallComputeElement(external link)




Bueno para los nodos primero instalar el cliente y los repos de la OSG
https://twiki.grid.iu.edu/bin/view/Documentation/Release3/InstallOSGClient(external link)