use warnings; use strict; use Data::Dumper; ######################################## my $default_lines_per_page = 25; my $default_characters_per_line = 60; # my $default_tab_stop = 5; my $default_words_per_page = 250; my $default_output_text = 0; ######################################## if( (scalar(@ARGV)==0) or ($ARGV[0]=~m{\A-h}) ) { print <<"HELP"; Call this script, passing in the name of a text file containing your story in plaintext format. This script will then read it, and generate a publisher-friendly word count (note that this is a different sort of word count than is reported by normal word processors.) Example: perl wordcount.pl mystory.txt By default, the program will assume $default_characters_per_line characters per line and $default_lines_per_page lines per page and $default_words_per_page words per page and $default_tab_stop characters per tab. If you want to change any of these values, add them on the command line after the filename. For example: perl wordcount.pl mystory.txt -lines=23 -chars=60 -words=245 -tabs=5 If you want to change the defaults, edit the first few lines of your copy of the program. If you want the script to print out the text that it formatted to determine the word count, use the -output=1 option. Copyright 2006 Greg London This program licensed under the CreativeCommons-Attribution license. http://creativecommons.org/licenses/by/2.5/ HELP ; exit; } ########################################################### # process arguments and check for errors ########################################################### my $storyname = shift(@ARGV); my %actual_values = ( lines => $default_lines_per_page, chars => $default_characters_per_line, tabs => $default_tab_stop, words => $default_words_per_page, output=> $default_output_text, ); while(scalar(@ARGV)) { my $arg = shift(@ARGV); if ($arg=~m{-(\w+)=(\d+)}) { my ($key, $val) = ($1,$2); unless(exists($actual_values{$key})) { die "Error: unknown argument '$arg'"; } $actual_values{$key}=$val; } } #print Dumper \%actual_values; unless(defined($storyname)) { die "Error: please provide text filename"; } unless (-e $storyname) { die "Error: could not find file '$storyname'"; } ########################################################### # only print out if -text=1 ########################################################### sub pprint { ########################################################### if($actual_values{output}) { print @_; } } ########################################################### #process file ########################################################### open(my $in, $storyname) or die "Error: unable to open $storyname"; my $linecounter = 0; my $columncounter = 0; my $chunk=''; while(<$in>) { $linecounter++; my $linetext = $_; while(length($linetext)) { if(0) { # tabs } elsif ($linetext =~ s{\A(\t)}{}) { $chunk = ' ' x $actual_values{tabs}; $columncounter += $actual_values{tabs}; #warn "aaa"; } elsif ($linetext =~ s{\A(\n)}{}) { $chunk = $1; $columncounter = $actual_values{chars} + 10; #warn "aaa"; # whitespace can go on end of line past line. } elsif ($linetext =~ s{\A(\s)}{}) { $chunk = $1; $columncounter += 1; #warn "aaa"; # don't split words with trailing punctuation. } elsif ($linetext =~ s{\A(\w+\S+)}{}) { $chunk = $1; $columncounter += length($chunk); #warn "aaa"; # don't split words, but if followed by whitespace, can ignore space this round. } elsif ($linetext =~ s{\A(\w+)}{}) { $chunk = $1; $columncounter += length($chunk); #warn "aaa"; # punctuation marks must be surrouned by whitespace } elsif ($linetext =~ s{\A([^\s\w]+)}{}) { $chunk = $1; $columncounter += length($chunk); #warn "aaa"; } else { die "Parse Error: no match on remaining text, '$linetext'"; } #print "chunk is '$chunk'\n"; # now figure out if we're about to go past end of column if($columncounter> $actual_values{chars}) { # if white space at end of line, don't print it. if($chunk =~ m{\A\s}) { $columncounter = 0; pprint("\n"); # if non-white space at end of line, print it on next line } else { pprint("\n"); pprint($chunk); $columncounter = length($chunk); } $linecounter++; } else { # haven't reached end of line, print out plain text pprint($chunk) } } # while (linetext) } # while (in) close ($in) or warn "Warning: problem closing '$storyname', I hope it's OK."; my $characters_per_line = $actual_values{chars}; my $total_pages_float = $linecounter / $actual_values{lines}; my $total_pages_int = int($total_pages_float * 10); my $total_pages = $total_pages_int / 10; my $words_per_page = $actual_values{words}; my $publisher_word_count = $total_pages * $words_per_page; print "\n\n\n"; print "#"x40; print "\n"; print "Characters per line is $characters_per_line\n"; print "total lines for this text is $linecounter\n"; print "With a lines per page of ". ($actual_values{lines})."\n"; print "that yields a total of $total_pages pages\n"; print "With a words per page of $words_per_page,\n"; print "that yields a publisher word count of $publisher_word_count\n"; print "#"x40; print "\n\n\n";