# This script will search any file for occurrences of "bacon" (or any of # its equivalents) using Penn Leary's methods as described in *The # Cryptographic Shakespeare*). To run this script, save it as a file # "bacfind.sh" and set permissions by entering the command # chmod u+x bacfind.sh # # Since this script is designed to search extremely large text files, # (an ascii file of Shakespeare's works is about 5 megabytes in size) # it is assumed that you will use your server's "/tmp" file to keep # the big file for the short time it takes this script to run. # (Searching the works of Shakespeare using this script took about # seven minutes, but there are ways to speed it up.) # # Now download a text to your server's "/tmp" directory and substitute the # name of the file for "TMPFILE" in the first line of this script. # # This script will translate the file into the Leary's 21-letter alphabet, # remove all punctuation and spaces, and then search for every line that # contains a string that would count as an occurrence of "bacon" under # Leary's rules looking first for forwards "bacons" and then for backwards # "bacons". It then sends a count of both forwards and backwards "bacons" # as well as all the lines containing those strings to a file in your home # directory that will be called "bac.TMPFILE" where "TMPFILE" will be # whatever filename you used in your server's "/tmp" directory. All # "/tmp" files used are then erased. # # This script is heavily commented; any lines beginning with "#" may be # deleted. # # Feel free to modify this script as you wish. # # Send questions or comments to Terry Ross at tross@mail.bcpl.lib.md.us # #------------------------------cut here------------------------- book=TMPFILE >bac.$book # The script uses several agrep and sed commands to translates the # original text file into a file that may be easily and quickly searched # (this translation is the slowest part of the procedure) # # the first command gathers all non-blank lines # agrep '.' /tmp/$book |\ # # then deletes all spaces at the head of each line # sed 's/^ *//g' |\ # # then changes all multiple spaces to single spaces # sed 's/ */ /g' |\ # # then removes everything between "{" and "}" (generally editorial matter) # sed 's/\{.*\}//g' |\ # # then removes everything between "[" and "]" (generally editorial matter) # sed 's/\[.*\]//g' |\ # # then removes everything between "|" and "|" # sed 's/\|.*\|//g' |\ # # then removes most punctuation as well as "0", "Z", and "X" # sed 's/[\.\,\:\;\"\!\?\-\_\(\)\&\{\}\/0zZxX]*//g' |\ # # then removes apostrophes # sed "s/\'//g" |\ # # then translates numerals into letters # sed 'y/123456789/abcdefghi/' |\ # # then we go through again and remove extra spaces # sed 's/^ *//g' |\ sed 's/ */ /g' |\ # # then we pass along all lines with alphabetical characters # agrep '[A-Za-z]' |\ # # then we get rid of spaces at the head of a line # sed 's/[^A-Za-z]*//g' |\ # # then we reformat the text to make fewer but longer lines to search # fmt -w 85 |\ # # then we get rid of all spaces and save the modified text in the "/tmp/" # directory # sed 's/ *//g' > /tmp/ci$book # # then we remove the original file from the "/tmp" directory # rm /tmp/$book # # # The next two lines do the real work of the script. Using Penn Leary's # methods, the next line searches for any string that begins with a "T" (a # "T" in English is a "b" in the cipher that Leary thinks Bacon used), # which is immediately followed by 1 or more English letters that become # vowels in Bakish, which is immediately followed by 1 or more English # letters that in Bakish could have a hard "c" or "k" sound, which is # immediately followed by 1 or more English letters that become vowels in # Bakish, which is immediately followed by an "I" or "J" (either of which # becomes "n" in Bakish). The lines that contain the pattern are saved # to a temporary file. # agrep -i 't[aekqrs]([aekqrs]*)[fmuvw]([dfmuvw]*)[aekqrs]([aekqrs]*)[ij]'\ /tmp/ci$book > bac.$book.tmp.forward # # The next line of script searches for the pattern backwards and saves # lines that contain the backwards pattern to a temporary file. # agrep -i '[ij][aekqrs]([aekqrs]*)([dfmuvw]*)[fmuvw][aekqrs]([aekqrs]*)t'\ /tmp/ci$book > bac.$book.tmp.backward # # Now we remove the modified text file from the "/tmp" directory # rm /tmp/ci$book # # the rest of the script sends the counts of bacons and the lines # containing them to a file in your home directory, and then it deletes # the temporary files # echo `agrep -c . bac.$book.tmp.forward` Forwards >bac.$book echo `agrep -c . bac.$book.tmp.backward` Backwards >>bac.$book echo ' ' >> bac.$book echo FORWARDS >> bac.$book agrep . bac.$book.tmp.forward >> bac.$book rm bac.$book.tmp.forward echo ' ' >> bac.$book echo BACKWARDS >> bac.$book agrep . bac.$book.tmp.backward >> bac.$book rm bac.$book.tmp.backward echo ' ' >>bac.$book # # To see how many "bacons" were in the file you searched, use your text # editor, or the "head", "more", or "less" command to look at the top of # the file "bac.TMPFILE" (where "TMPFILE" is the name you used for the # original file you searched). # # ------------------ end of bacfind.sh -------------------