#!/bin/sh ################################################################## # these are some sample usages of BSP. while this is not intended # to be an exhaustive treatment of the various features, it should # give you a good idea of some of the ways you can use BSP. # # you can read this as a text file, or you can run it as a shell # script on unix-type machines. # # you can also run this across versions of bsp to test new versions # and make sure they are backwards compatible with older versions # (if they aren't there should be an explanation in ChangeLog.txt!) # # You can run this script with any text file named h.txt # # Last updated 02/15/01 by TDP for BSP v0.3 # # The Testing directory and Readme.txt should be consulted for # usage ideas specific to NSP v0.5 ################################################################## # help!!! count.pl -help rank.pl -help statistic.pl -help ################################################################## # count all the bigrams in h.txt and store counts in holmes1.out count.pl h.cnt h.txt # count all bigrams that occur 5 or more times and store counts in # holmes1.out5 AND create a histogram of the bigram counts and store # in h-5.hist count.pl -frequency 5 -hist h-5.hist h-5.cnt h.txt # exclude all bigrams made up two words from stop.txt count.pl -stop stop.txt h-stop.cnt h.txt # count all bigrams that occur within a 4 word window AND use # a stop list (this is especially useful to prevent bigrams # caused by multiple occurrences of frequent words within # the given window size (like 'and and' 'of of' etc.) count.pl -stop stop.txt -window 4 h-stop-w5.cnt h.txt ############################################################## # create a list of bigrams ranked by log-likelihood ratios. # only allow scores of 6.00 or better among bigrams that occur # more 3 or more times. (if you had used count to exclude # certain frequencies you could simply use that file as input) # (the .pm after the test name is optional) statistic.pl -score 6.00 -frequency 5 ll.pm holmes1.ll h.cnt # create a list of bigrams ranked by fisher's exact test (left # sided) that only allow scores of 0.90 or better statistic.pl -score 0.90 leftFisher.pm h.fish h.cnt # create a list of the top 10 bigrams as ranked by the dice # coefficient. statistic.pl -rank 10 dice.pm h.dice h.cnt # create a formatted report where bigrams are ranked by pointwise # mutual information values, reported to 4 digits of precision. statistic.pl -format mi.pm -precision 4 h.report h.cnt ############################################################## # compare the ranked list of bigrams created by pointwise # mutual information and the log-likelihood ratio. make # comparisons based on 3 digits of precision. rank.pl -precision 3 mi ll h.mi-ll-rank h.cnt # compare the ranked list of bigrams created by pointwise # mutual information and the dice coefficient. make # comparisons based on 5 digits of precision and compare # only the top 10 bigrams selected by mutual information. rank.pl -precision 5 -rank 10 mi dice h.mi-dice-rank h.cnt # compare the ranked list of bigrams found by fisher's exact # test and the dice coefficient. make comparisons based on # 2 digits of precision and compare only those bigrams that # score greater than 0.90 on fisher's exact test. rank.pl -precision 5 -score 0.90 leftFisher dice h.fish-dice-rank h.cnt