#!/bin/sh

#this is a bash script that performs all the necessary steps. To run it you just need to have in one folder all the following scripts and another folder where you have the generated files. You run it like this: shell: ./complete_project.sh

#The script considers that you have installed minorthird. If you have just the jar. file change the command that calls minorthird to this:
#java -Xmx2G[or how much you want to put] minorthird_20080611.jar edu.cmu.minorthird.ui.RunMixup -labels [folder you have the prescriptions/] -mixup [folder you have the scripts/] -saveAs  [results_folder/file_name] and ignore the command "source where_minorthird_is_installed/script/setup.linux"

##replace where_minorthird_is_installed with the path towards minorthird

#source where_minorthird_is_installed/script/setup.linux ###############[comment with # this command if you have just the jar file]################

#cd /var/odin/work/minorthird ###############[comment with # this command if you have just the jar file]################

############### PUT THE FULL PYTHON PATH
# you have to give the python full path in order to work. Replace the /cygdrive/c/Python27/python.exe with your respective path

######PUT THE ORIGINAL TEXT FILE IN THE SCRIPTS BELOW
## Replace the example.txt file with the original file that contains the prescription text that you want to mine

mkdir temp

/cygdrive/c/Python27/python.exe  scripts/drug_information_extraction.py example.txt scripts/stoplist2.txt pr temp/

echo "finished extracting the files"

#java -Xmx2G edu.cmu.minorthird.ui.RunMixup -labels Folder/ -mixup scripts/dose_characteristics.mixup -saveAs  generated_file_folder/dosenumber50000.labels

java -Xmx1200M -cp minorthird_20080611.jar edu.cmu.minorthird.ui.RunMixup -labels temp/ -mixup scripts/dose_characteristics.mixup -saveAs  temp/spans.labels

#cd ..

/cygdrive/c/Python27/python.exe  scripts/blank_line.py   temp/spans.labels  temp/dosenumber50000.labels
#bash  scripts/blank_line.sh   temp/dosenumber50000.labels
#removing the blank line from the minorthird produced file

/cygdrive/c/Python27/python.exe   scripts/fil_dose.py  temp/dosenumber50000.labels    temp/fil_dosenumber50000.labels
#filtering the minorthird spans - this gives the true number of filtered spans

/cygdrive/c/Python27/python.exe   scripts/dose_extraction.py  temp/fil_dosenumber50000.labels   temp/extracted_medinfo50000.txt temp/
#extract the dose number from the text

/cygdrive/c/Python27/python.exe   scripts/word_digit_con.py  temp/extracted_medinfo50000.txt   temp/converted_dose_numbers50000.txt
#convert the number from the extracted dose numbers - if they are string, we convert them through a dictionary into numbers

/cygdrive/c/Python27/python.exe   scripts/word_digit_con2.py  temp/converted_dose_numbers50000.txt   temp/new_converted_dose_numbers50000.txt
# here we convert the extra 5-50000 ml that have been appearing lately through the rule of the regular expressions - more dose numbers equal as 1 now.

/cygdrive/c/Python27/python.exe   scripts/get_project_text.py   temp/new_converted_dose_numbers50000.txt    temp/new_converted_dose_numbers500002.txt

/cygdrive/c/Python27/python.exe   scripts/column_comparison2.py  temp/new_converted_dose_numbers500002.txt  example.txt   temp/50000columns.txt  
###here example.txt file or the file of the original interest has to be called again
#here we add the dose number column and taken the text from the original source file of cprd data

/cygdrive/c/Python27/python.exe   scripts/required.py  temp/extracted_medinfo50000.txt  temp/50000columns.txt   temp/new_50000columns.txt
# here we integrate the field "if it is required/needed

/cygdrive/c/Python27/python.exe   scripts/add_dose_unit.py  temp/extracted_medinfo50000.txt  temp/new_50000columns.txt   temp/all_columns_together50000.txt
# here we integrate the following: we add the extracted dosage unit from us 

#/cygdrive/c/Python27/python.exe  scripts/add_choice_of_dose.py  example.txt   temp/choice_of_dose50000.txt #new_compared_units_DNS50000.txt
#this should be the comparison between these two columns :P
###here example.txt file or the file of the original interest has to be called again

#/cygdrive/c/Python27/python.exe   scripts/choice_dose_addition.py  temp/choice_of_dose50000.txt  temp/new_50000columns_units.txt    temp/all_columns_together50000.txt

echo "added dose number"
#here we are adding the comparison of the choice of dose along with the other thus the all-collumns-together.

/cygdrive/c/Python27/python.exe   scripts/freq_word_con.py  temp/extracted_medinfo50000.txt   temp/converted_frequency_numbers50000.txt
#Hconvert the word frequencies into number - that is probably the toughest of everything :P - probably needs more work when we work with the 1,000 set.

/cygdrive/c/Python27/python.exe   scripts/get_project_text_frequency.py   temp/converted_frequency_numbers50000.txt    temp/converted_frequency_numbers500002.txt

echo "adding here the dose frequency"

/cygdrive/c/Python27/python.exe   scripts/add_dose_frequency.py  temp/converted_frequency_numbers500002.txt  example.txt   temp/columns_and_frequency50000.txt
# this will add the extracted dose frequencies along with the extracted dose numbers :P 
###here example.txt file or the file of the original interest has to be called again

/cygdrive/c/Python27/python.exe   scripts/add_columns_frequency.py  temp/columns_and_frequency50000.txt   temp/all_columns_together50000.txt   temp/all_columns_together50000_fr.txt
#here we are adding together the columns of dose frequency coming from the columsn_and_frequency[number] with the rest of the related information of all_columns_together. :P

/cygdrive/c/Python27/python.exe   scripts/required_conv.py  temp/all_columns_together50000_fr.txt   temp/all_columns_together50000_fr_conv.txt
#here we include zero in the dose frequency if it says that it is when required/needed/etc

/cygdrive/c/Python27/python.exe   scripts/default_dose_number.py  temp/all_columns_together50000_fr_conv.txt   temp/default_dose_number50000.txt   temp/new_default_dose_number50000.txt
# here we converted the zeros into ? for anything i.e., dose number and dose frequency. also defaulting dose number into 1 if there is a frequency or verbs into the sentence.

echo "just before the end"

/cygdrive/c/Python27/python.exe   scripts/conv_dose_interval.py  temp/extracted_medinfo50000.txt   temp/converted_dose_intervals50000.txt
#here we are converting the extracted text of dose intervals to 1, 2, 3, 4, .... etc :P

/cygdrive/c/Python27/python.exe   scripts/add_dose_interval.py  temp/converted_dose_intervals50000.txt   temp/new_default_dose_number50000.txt   temp/all_columns_together50000_di_conv.txt
# here at the end we are adding the last column of dose interval in the final file! end of story! needs to try this in the 50000 lines!

echo "added dose interval"

/cygdrive/c/Python27/python.exe  scripts/csv_output.py temp/all_columns_together50000_di_conv.txt csv_results.csv

echo "See the output csv file in the parent folder"

rm -r temp/
