#!/usr/bin/perl ################################################################################ # # File: HERest.pl # A perl script for parellel processing of the command 'HERest' from HTK # # It seems that this parellel implelmenation of the 'HERest' does NOT produce # the same result. According to the HTK book, it seems that we need to put # equal number of data files into each of the processor which is not always # possible. So it's still experimental. # # Usage: # HERest.pl [options] hmmList dataFiles... # # For details about the usage, check it by typing "HERest" without any options. # # This perl script is designed to run transparently, e.g., you can run this # script as if you run 'HERest'. # # This script submits parellel jobs through the SGE (Sun Grid Engine) using # an SGE command 'qsub' and checks the job progress using 'qstat' # # It returns when all the parellel jobs are finished # # Written by Bowon Lee, 02/22/2006 # # Department of the Electrical and Computer Engineering # University of Illinois at Urbana-Champaign # ################################################################################ # Specify the number of processors $NP = 16; # Number of processors # Specify the command to be executed $COMMAND = "HERest"; # Check my user ID $USERID = readpipe("whoami"); # Check for the input script file following the option '-S' # and output HMM model file following the option '-M' @ARGIN = @ARGV; foreach $n (0..$#ARGIN) { $NSCP = $n+1 if($ARGIN[$n] eq "-S"); $NMMF = $n+1 if($ARGIN[$n] eq "-M"); } $scpi = "$ARGIN[$NSCP]"; $mmfi = "$ARGIN[$NMMF]"; # Open the input script and compute the script size for each processor open(SCP,"$scpi") || die "Cannot open $scpi: $!"; $NLINES = 0; foreach () { $NLINES += 1; } $SCPSIZE = int($NLINES/$NP); close(SCP); # Create a list of divided data set @scpn = (); foreach $n (1..$NP) { $scpn[$n-1] = "$scpi"; $scpn[$n-1] =~ s/(.*)(\..*)/\1\_$n\2/g; } # Divide the data set and write them into each script file $n = 0; $nlines = 0; foreach $line () { if( ($nlines == $SCPSIZE * $n) && ($n != $NP ) ) { close(SCPPL); open(SCPPL, ">$scpn[$n]"); $n = $n + 1; } print SCPPL "$line"; $nlines += 1; } close(SCPPL); # Create command for each processor @commands = (); foreach $n (1..$NP) { $commands[$n-1] = "$COMMAND"; foreach $narg (0..$#ARGIN-1) { unless($narg == $NSCP ) { if($ARGIN[$narg] =~ m/\*/) { $commands[$n-1] = "$commands[$n-1] '$ARGIN[$narg]'"; } else { $commands[$n-1] = "$commands[$n-1] $ARGIN[$narg]"; } } $commands[$n-1] = "$commands[$n-1] $scpn[$n-1]" if($narg == $NSCP); } $commands[$n-1] = "$commands[$n-1] -p $n"; $commands[$n-1] = "$commands[$n-1] $ARGIN[$#ARGIN]"; } open(SCP,"$scpi") || die "Cannot open $scpi: $!"; # Write script for each processor and submit the job foreach $n (0..$NP-1) { $scps = "$COMMAND\_$n.sh"; open(SGESCP,">$scps") || die "Cannot open $scps: $!"; print SGESCP '#!/bin/bash'; print SGESCP "\n"; print SGESCP '#$ -S /bin/bash'; print SGESCP "\n"; print SGESCP '#$ -cwd'; print SGESCP "\n"; print SGESCP "\n"; print SGESCP "$commands[$n]\n"; system("qsub $scps"); } # Wait until all the jobs are completed $wait = 1; while($wait) { sleep 30; print "Checking job progress: "; @jobs = readpipe("qstat -u $USERID"); $nproc = 0; foreach $job (@jobs) { $nproc += 1 if ($job =~ /$COMMAND/); } if($nproc == 0) { $wait = 0; next; } print "$nproc jobs are still running...\n"; } print "Done\n"; # Check any errors print "Checking any errors: "; @errors = readpipe("cat $COMMAND*.sh.e*"); $errorcheck = $#errors + 1; if($errorcheck) { system("cat $COMMAND*.sh.e* > $COMMAND\_errors"); } # Merge the results print "Merging results: "; $command = "HERest"; foreach $narg (0..$#ARGIN-1) { unless($narg == $NSCP || $narg == $NSCP-1) { if($ARGIN[$narg] =~ m/\*/) { $command = "$command '$ARGIN[$narg]'"; } else { $command = "$command $ARGIN[$narg]"; } } } $command = "$command -p 0"; $command = "$command $ARGIN[$#ARGIN] $mmfi\/\*.acc"; system("$command"); # Clean temporary files print "Cleaning temporary files: "; foreach $n (0..$NP-1) { system("rm -f $scpn[$n]"); system("rm -f $COMMAND\_$n.sh*"); } print "Done\n"; # If error occurred, then print this message if($errorcheck) { print STDERR "Error occured: Please check $COMMAND\_errors\n"; }