#!/usr/bin/perl ################################################################################ # # File: HVite.pl # A perl script for parellel processing of the command 'HVite' from HTK # # Usage: # HVite.pl [options] VocabFile HMMList DataFiles... # # For details about the usage, check it by typing "HVite" without any options # # This perl script is designed to run transparently, e.g., you can run this # script as if you run 'HVite'. # # This script submits parellel jobs through the SGE (Sun Grid Engine) using # an SGE command 'qsub' and checks the job progress using 'qstat' # # It returns when all the parellel jobs are finished # # If the script size is not large enough, then this script submits a single job # In this case, this script does not check for job completion so that the main # routine can send multiple jobs to the cluster. So you need to check the job # completion in the main routine if necessary. # # Written by Bowon Lee, 02/22/2006 # # Department of the Electrical and Computer Engineering # University of Illinois at Urbana-Champaign # ################################################################################ # Specify the number of processors $NP = 32; # Number of processors # Specify the command to be executed $COMMAND = "HVite"; # Check my user ID $USERID = readpipe("whoami"); # Check for the input script file following the option '-S' # and output label file following the option '-i' @ARGIN = @ARGV; foreach $n (0..$#ARGIN) { $NSCP = $n+1 if($ARGIN[$n] eq "-S"); $NMLF = $n+1 if($ARGIN[$n] eq "-i"); } $scpi = "$ARGIN[$NSCP]"; $mlfi = "$ARGIN[$NMLF]"; # Open the input script and compute the script size for each processor open(SCP,"$scpi") || die "Cannot open $scpi: $!"; $NLINES = 0; foreach () { $NLINES += 1; } $SCPSIZE = int($NLINES/$NP); close(SCP); # When the script size for each processor is larger than 32, if($SCPSIZE >= 32) { # Create a list of divided data @scpn = (); foreach $n (0..$NP-1) { $scpn[$n] = "$scpi"; $mlfn[$n] = "$mlfi"; $scpn[$n] =~ s/(.*)(\..*)/\1\_$n\2/g; $mlfn[$n] =~ s/(.*)(\..*)/\1\_$n\2/g; } # Divide the data and write them into each script file open(SCP,"$scpi") || die "Cannot open $scpi: $!"; $n = 0; $nlines = 0; foreach $line () { if( ($nlines == $SCPSIZE * $n) && ($n != $NP ) ) { close(SCPPL); open(SCPPL, ">$scpn[$n]"); $n = $n + 1; } print SCPPL "$line"; $nlines += 1; } close(SCPPL); # Create command for each processor @commands = (); foreach $n (0..$NP-1) { $commands[$n] = "$COMMAND"; foreach $narg (0..$#ARGIN) { unless($narg == $NSCP || $narg == $NMLF) { if($ARGIN[$narg] =~ m/\*/) { $commands[$n] = "$commands[$n] '$ARGIN[$narg]'"; } else { $commands[$n] = "$commands[$n] $ARGIN[$narg]"; } } $commands[$n] = "$commands[$n] $scpn[$n]" if($narg == $NSCP); $commands[$n] = "$commands[$n] $mlfn[$n]" if($narg == $NMLF); } } # Write script for each processor foreach $n (0..$NP-1) { $scps = "$COMMAND\_$n.sh"; open(SGESCP,">$scps") || die "Cannot open $scps: $!"; print SGESCP '#!/bin/bash'; print SGESCP "\n"; print SGESCP '#$ -S /bin/bash'; print SGESCP "\n"; print SGESCP '#$ -cwd'; print SGESCP "\n"; print SGESCP "\n"; print SGESCP "$commands[$n]\n"; system("qsub $scps"); } # Wait until all the jobs are completed $wait = 1; while($wait) { sleep 30; print "Checking job progress: "; @jobs = readpipe("qstat -u $USERID"); $nproc = 0; foreach $job (@jobs) { $nproc += 1 if ($job =~ /$COMMAND/); } if($nproc == 0) { $wait = 0; next; } print "$nproc jobs are still running...\n"; } print "Done\n"; # Check any errors print "Checking any errors: "; @errors = readpipe("cat $COMMAND*.sh.e*"); $errorcheck = $#errors + 1; if($errorcheck) { system("cat $COMMAND*.sh.e* > $COMMAND\_errors"); } # Merge the results print "Merging results: "; open(MLF,">$mlfi") || die "Cannot open $mlfi: $!"; print MLF '#!MLF!#'; print MLF "\n"; foreach $n (0..$NP-1) { open(MLFI,"$mlfn[$n]") || die "Cannot open $mlfn[$n]: $!"; my @lines = ; foreach $n (1..$#lines) { print MLF "$lines[$n]"; } close(MLFI); } close(MLF); print ": Completed!\n"; # Clean temporary files print "Cleaning temporary files: "; foreach $n (0..$NP-1) { system("rm -f $scpn[$n]"); system("rm -f $mlfn[$n]"); system("rm -f $COMMAND\_$n.sh*"); } print "Done\n"; # If error occurred, then print this message if($errorcheck) { print STDERR "Error occured: Please check $COMMAND\_errors\n"; } } # if($SCPSIZE >= 32) { # When the script size is not large enough, then submit a single job else { print "Script size is too small for parellel processing: submitting a single job\n"; $command = "HVite"; foreach $narg (0..$#ARGIN) { if($ARGIN[$narg] =~ m/\*/) { $command = "$command '$ARGIN[$narg]'"; } else { $command = "$command $ARGIN[$narg]"; } } $scps = "HVite.sh"; open(SGESCP,">$scps") || die "Cannot open $scps: $!"; print SGESCP '#!/bin/bash'; print SGESCP "\n"; print SGESCP '#$ -S /bin/bash'; print SGESCP "\n"; print SGESCP '#$ -cwd'; print SGESCP "\n"; print SGESCP "\n"; print SGESCP "$command\n"; system("qsub $scps"); }