#!/usr/bin/perl # # Aaron Wise, 04/06/08, # Modified by Tyler Kendall, for the sbc_to_praat.php webpage, 05/02/08 # # usage: perl sbc_to_tg.pl input-file > output-file my %data_hash; #open our input file open(FIN, $ARGV[0]) or die("No input file name!\n"); my $file_length; #preprocessing, watch out for line breaks. my @input_array; while(my $line = ) { #this expression checks to see if the line starts with a digit if( $line=~m/\d+/) { #do nothing chomp($line); push(@input_array, $line); } else { #if it doesn't start with a digit, it's a run-on line #so we append it to the previous entry my $oldline=pop(@input_array); chomp($line); $oldline=$oldline.$line; push(@input_array, $oldline); } } #make the input usable & organize the data #warning, severely ugly code follows my $last_speaker; foreach my $line (@input_array) { #isolate the important fields my @this_item=split(/\s+/, $line); my $start_time=shift(@this_item); my $end_time=shift(@this_item); $file_length=$end_time; my $speaker=shift(@this_item); my $text; #This next code checks to see if the speaker is specified in the line #and if it isn't, it assigns the previous speaker this line of speech. #This regular expression looks for a colon at the end of the next word. #The presence of the colon denotes that it contains speaker information if( $speaker=~/(\w+)\:/) { #we strip the colon by retrieving data from the expression $speaker=$1; #then we store the speaker, so that we can remember who is speaking #if the next line doesn't tell us $last_speaker=$speaker; $text=join(' ', @this_item); } else { #here, we don't know the speaker in this line of text #so we grab the information from $last_speaker $text=join(' ', @this_item); $text=$speaker.' '.$text; $speaker=$last_speaker; } #check if this is the first time we have seen this speaker if(exists $data_hash{$speaker}) { #if not, we simply add the relevant #data to a hash which contains an array #which contains arrays of each individual entry #i.e., hash->array->array (i.e., eeeewww!) #in our case %data_hash->@AoA->@speaker_array my $temp_ref=$data_hash{$speaker}; my @AoA=@$temp_ref; my @speaker_array; push(@speaker_array, $start_time); push(@speaker_array, $end_time); push(@speaker_array, $text); #the [] gives us a pointer to the variable push(@AoA, [@speaker_array]); $data_hash{$speaker}=[@AoA]; } else { #we haven't seen this speaker before, so #we create a new entry in the hash for him. my @AoA; my @speaker_array; push(@speaker_array, $start_time); push(@speaker_array, $end_time); push(@speaker_array, $text); push(@AoA, [@speaker_array]); $data_hash{$speaker}=[@AoA]; } } #set up textgrid #open(FOUT, ">$ARGV[1]") or die("Enter an output file name!\n"); print 'File type = "ooTextFile"'."\n"; print 'Object class = "TextGrid"'."\n\n"; print "xmin = 0\n"; print "xmax = $file_length\n"; print "tiers? \n"; my $num_speakers=keys(%data_hash); print "size = $num_speakers\n"; print "item []:\n"; my $i=1; #now we write all the intervals for each speaker while( my($key, $value) = each(%data_hash) ) { #general information for each new speaker my $last_time=0; my @two_d_array=@$value; print " item [$i]:\n"; print ' class = "IntervalTier"'."\n"; print ' name = "'.$key.'"'."\n"; print " xmin = 0\n"; print " xmax = $file_length\n"; print " intervals: size = ". size($value). "\n"; my $j=1; for my $single_entry (sort{$a->[0] <=> $b->[0] } @two_d_array) #now we add each interval { my @this_one = @$single_entry; #add an empty interval if needed if($last_time<$this_one[0]) { print " intervals [$j]:\n"; print " xmin = $last_time\n"; print " xmax = $this_one[0]\n"; print " text = ".'""'."\n"; $j++; } #process each data item print " intervals [$j]:\n"; print " xmin = $this_one[0]\n"; print " xmax = $this_one[1]\n"; print " text = " . '"' . "$this_one[2]" . '"' . "\n"; $j++; #we store the max time so that we can create the empty intervals $last_time=$this_one[1]; } $i++; } #this subroutine tells us how many intervals exist for a given speaker. #The input is the array of the speakers intervals sub size { my $last_time=0; my $temp=$_[0]; my @two_d_array=@$temp; my $j=1; for my $single_entry (sort{$a->[0] <=> $b->[0] } @two_d_array) #foreach $single_entry (@two_d_array) { my @this_one = @$single_entry; #add an empty interval if needed if($last_time<$this_one[0]) { $j++; } #process each data item $j++; #we store the max time so that we can create the empty intervals $last_time=$this_one[1]; } return $j-1; }