#!/usr/bin/perl ################################################################################ # # File: HCopy.pl # A perl script for parellel processing of the command 'HCopy' from HTK # # Usage: # HCopy.pl [options] src [ + src ...] tgt ... # # For details about the usage, check it by typing "HCopy" without any options. # # This perl script is designed to run transparently, e.g., you can run this # script as if you run 'HCopy'. # # This script submits parellel jobs through the SGE (Sun Grid Engine) using # an SGE command 'qsub' and checks the job progress using 'qstat'. # # It returns when all the parellel jobs are finished. # # If the script size is not large enough, then this script submits a single job # In this case, this script does not check for job completion in order to send # multiple jobs to the cluster. So we need to check the job progress from # the main routine. # # Written by Bowon Lee, 02/22/2006 # # Department of the Electrical and Computer Engineering # University of Illinois at Urbana-Champaign # ################################################################################ # Specify the number of processors $NP = 32; # Number of processors # Specify the command to be executed $COMMAND = "HCopy"; # Check my user ID $USERID = readpipe("whoami"); # Check for the input script file following the option '-S' @ARGIN = @ARGV; foreach $n (0..$#ARGIN) { $NSCP = $n+1 if($ARGIN[$n] eq "-S"); } $scpi = "$ARGIN[$NSCP]"; # Open the input script and compute the script size for each processor open(SCP,"$scpi") || die "Cannot open $scpi: $!"; $NLINES = 0; foreach () { $NLINES += 1; } $SCPSIZE = int($NLINES/$NP); close(SCP); # When the script size for each processor is larger than 32, if($SCPSIZE >= 32) { # Create a list of divided data @scpn = (); foreach $n (0..$NP-1) { $scpn[$n] = "$scpi"; $scpn[$n] =~ s/(.*)(\..*)/\1\_$n\2/g; $mlfn[$n] =~ s/(.*)(\..*)/\1\_$n\2/g; } # Divide the data and write them into each script file open(SCP,"$scpi") || die "Cannot open $scpi: $!"; $n = 0; $nlines = 0; foreach $line () { if( ($nlines == $SCPSIZE * $n) && ($n != $NP ) ) { close(SCPPL); open(SCPPL, ">$scpn[$n]"); $n = $n + 1; } print SCPPL "$line"; $nlines += 1; } close(SCPPL); # Create command for each processor @commands = (); foreach $n (0..$NP-1) { $commands[$n] = "$COMMAND"; foreach $narg (0..$#ARGIN) { unless($narg == $NSCP) { if($ARGIN[$narg] =~ m/\*/) { $commands[$n] = "$commands[$n] '$ARGIN[$narg]'"; } else { $commands[$n] = "$commands[$n] $ARGIN[$narg]"; } } $commands[$n] = "$commands[$n] $scpn[$n]" if($narg == $NSCP); } } # Write script for each processor foreach $n (0..$NP-1) { $scps = "$COMMAND\_$n.sh"; open(SGESCP,">$scps") || die "Cannot open $scps: $!"; print SGESCP '#!/bin/bash'; print SGESCP "\n"; print SGESCP '#$ -S /bin/bash'; print SGESCP "\n"; print SGESCP '#$ -cwd'; print SGESCP "\n"; print SGESCP "\n"; print SGESCP "$commands[$n]\n"; system("qsub $scps"); } # Wait until all the jobs are completed $wait = 1; while($wait) { sleep 30; print "Checking job progress: "; @jobs = readpipe("qstat -u $USERID"); $nproc = 0; foreach $job (@jobs) { $nproc += 1 if ($job =~ /$COMMAND/); } if($nproc == 0) { $wait = 0; next; } print "$nproc jobs are still running...\n"; } print "Done\n"; # Check any errors print "Checking any errors: "; @errors = readpipe("cat $COMMAND*.sh.e*"); $errorcheck = $#errors + 1; if($errorcheck) { system("cat $COMMAND*.sh.e* > $COMMAND\_errors"); } # Cleaning temporary files print "Cleaning temporary files: "; foreach $n (0..$NP-1) { system("rm -f $scpn[$n]"); system("rm -f $mlfn[$n]"); system("rm -f $COMMAND\_$n.sh*"); } print "Done\n"; # If error occurred, then print this message if($errorcheck) { print STDERR "Error occured: Please check $COMMAND\_errors\n"; die; } } # if($SCPSIZE >= 32) { # When the script size is not large enough, then submit a single job else { print "Script size is too small for parellel processing: "; print "Sending a single job...\n"; $command = "$COMMAND"; foreach $n (0..$#ARGIN) { $command = "$command $ARGIN[$n]"; } $scp = "$COMMAND_single.sh"; open(SGESCP,">$scp") || die "Cannot open $scp: $!"; print SGESCP '#!/bin/bash'; print SGESCP "\n"; print SGESCP '#$ -S /bin/bash'; print SGESCP "\n"; print SGESCP '#$ -cwd'; print SGESCP "\n"; print SGESCP "\n"; print SGESCP "$command\n"; close(SGESCP); system("qsub $scp"); system("rm -f $scp*"); }