#!/bin/sh

##################################################################
# these are some sample usages of BSP. while this is not intended
# to be an exhaustive treatment of the various features, it should
# give you a good idea of some of the ways you can use BSP.
#
# you can read this as a text file, or you can run it as a shell
# script on unix-type machines. 
#
# you can also run this across versions of bsp to test new versions
# and make sure they are backwards compatible with older versions
# (if they aren't there should be an explanation in ChangeLog.txt!)
#
# You can run this script with any text file named h.txt
#
# Last updated 02/15/01 by TDP for BSP v0.3
# 
# The Testing directory and Readme.txt should be consulted for 
# usage ideas specific to NSP v0.5 
##################################################################

# help!!!

count.pl -help
rank.pl -help
statistic.pl -help

##################################################################

# count all the bigrams in h.txt and store counts in holmes1.out

count.pl h.cnt h.txt

# count all bigrams that occur 5 or more times and store counts in 
# holmes1.out5 AND create a histogram of the bigram counts and store
# in h-5.hist
 
count.pl -frequency 5 -hist h-5.hist h-5.cnt h.txt

# exclude all bigrams made up two words from stop.txt

count.pl -stop stop.txt h-stop.cnt h.txt

# count all bigrams that occur within a 4 word window AND use
# a stop list (this is especially useful to prevent bigrams 
# caused by multiple occurrences of frequent words within
# the given window size (like 'and and' 'of of' etc.)

count.pl -stop stop.txt -window 4 h-stop-w5.cnt h.txt

##############################################################

# create a list of bigrams ranked by log-likelihood ratios.
# only allow scores of 6.00 or better among bigrams that occur
# more 3 or more times. (if you had used count to exclude
# certain frequencies you could simply use that file as input)
# (the .pm after the test name is optional)

statistic.pl -score 6.00 -frequency 5 ll.pm holmes1.ll h.cnt

# create a list of bigrams ranked by fisher's exact test (left
# sided) that only allow scores of 0.90 or better 

statistic.pl -score 0.90 leftFisher.pm h.fish h.cnt

# create a list of the top 10 bigrams as ranked by the dice 
# coefficient. 

statistic.pl -rank 10 dice.pm h.dice h.cnt

# create a formatted report where bigrams are ranked by pointwise
# mutual information values, reported to 4 digits of precision. 

statistic.pl -format mi.pm -precision 4 h.report h.cnt

##############################################################

# compare the ranked list of bigrams created by pointwise 
# mutual information and the log-likelihood ratio. make 
# comparisons based on 3 digits of precision. 

rank.pl -precision 3 mi ll h.mi-ll-rank h.cnt

# compare the ranked list of bigrams created by pointwise 
# mutual information and the dice coefficient. make 
# comparisons based on 5 digits of precision and compare
# only the top 10 bigrams selected by mutual information. 

rank.pl -precision 5 -rank 10 mi dice h.mi-dice-rank h.cnt

# compare the ranked list of bigrams found by fisher's exact
# test and the dice coefficient. make comparisons based on 
# 2 digits of precision and compare only those bigrams that
# score greater than 0.90 on fisher's exact test. 

rank.pl -precision 5 -score 0.90 leftFisher dice h.fish-dice-rank h.cnt