*Matthew LaPenta;
*Created 02.08.08;

*Modified 12.11.08 updated documentation;
*Modified 11.21.08 add new NAICS code to analysis;
*Modified 02.22.08 increase number of samples;
*Modified 02.22.08 add distributional information;
*Modified 02.21.08 increase number of samples;
*Modified 02.20.08 increase program efficiency;
*Modified 02.19.08 use revenue bracket-specific numbers of vessels;
*Modified 02.16.08 output naics-firm-specific distributions;
*Modified 02.15.08 output naics-firm-specific distributions;
*Modified 02.12.08 output naics-specific distributions;
*Modified 02.11.08 (developing program); 
*Description
	There are 16 low-cost and 21 high-cost practices required of vessels subject to the discharge rule.
		For each individual practice there is a probability that an incremental cost is incurred under a 
		rule scenario.  This program calculates the probability that incremental costs are incurred for each 
		possible combination of practices (assuming that the probability of incurring incremental costs is
		independent across practices).  Data are exported to excel that show, for each vessel type, 
		the practice combination, the number of vessels for the practice combination, the probability of
		a practice combination, the per-vessel cost of a practice combination, and the total costs associated 
		with a practice combination;
*Drive (c or h);
*Drive (c or h);
	%let d=h;
*File Location;
	%let mainloc=H:\Projects\Tyche_Option_1_Task_1-04\SAS Analysis\;
	%let inloc=&mainloc.Inputs\;
	%let outloc=&mainloc.Outputs\;

	*Inputs;
%let input=&inloc.Distribution Analysis Inputs NEW;

*Outputs;
%let output=&outloc.O;


libname temp "&mainloc.Temp";
libname temp2 "&mainloc.Temp\temp2";
libname temp3 "&mainloc.Temp\temp3";
libname temp4 "&mainloc.Temp\temp4";

%macro cathin(input,code);

PROC DATASETS LIBRARY=temp   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=temp2   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=temp3   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=temp4   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA    KILL nolist; 
QUIT;
*Begin Program;

*This Macro is used to Import data from excel;
%macro import(infile,sheet,out);
PROC IMPORT OUT= WORK.&out 
            DATAFILE= "&infile. &sheet." 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN;
%mend;
*This Macro is used to Export data to excel;
%macro export(datafile,outfile,sheet);

PROC EXPORT DATA= &datafile.
            OUTFILE= "&outfile.&sheet."
            DBMS=CSV REPLACE;
     PUTNAMES=YES;
RUN;

%mend;

%import(&input, naicsnum.csv,num);
data temp2.num;
	set num;
run;

*	1	get thresholds data;
%import(&input, NAICSthresh.csv,th1);
data TEMP2.th2 (keep= NAICS O1-O11 T1-T11);
	set th1 (keep=NAICS A1-A11);
		array A(11) A1-A11;
		array O(11) O1-O11;
		array T(11) T1-T11;
			do i=1 to 11;
				if A(i)>0 then do;
					O(i)=.01*A(i);
					T(i)=.03*A(i);
				end;
			end;
run;

*	2a	Get percentages of vessel types for NAICS (Import);
%import(&input, NAICSWEIGHTS.csv,n1);

*	2b	Get percentages of vessel types for NAICS (drop unneded vars);
data n2;
	set n1;
		if class="Dummy" then delete;
run;

*	2c	Get percentages of vessel types for NAICS (Get total number of vessels by NAICS);
proc sql;
	create table n3 as select
		n2.*,
		number/sum(number) as percent
	from n2 group by NAICS order by NAICS,Class;
quit;

*	2d	Get percentages of vessel types for NAICS (Get percentage for each vessel type);
%macro boat2(class,short,ini);
data x_&ini. (keep=NAICS &ini._percent);
	set n3;
		if Class="&class";
		rename percent=&ini._percent;
run;
%mend;
%boat2(Cruise Ships,Cruise,cs);
%boat2(Commercial Fishing,Comm,cf);
%boat2(Freight Barges,FBarge,fb);
%boat2(Freight Ships,FShips,fs);
%boat2(Passenger Vessels,Pass,pv);
%boat2(Recreational Ships,Rec,rs);
%boat2(Tank Barges,TBarge,tb);
%boat2(Tank Ships,TShips,ts);
%boat2(Utility Vessels,Utility,ut);
*	2c	Get percentages of vessel types for NAICS (Combine data);
data TEMP2.n4;
	merge x_cs x_cf x_fb x_fs x_pv x_rs x_tb x_ts x_ut;
	by NAICS;
run;


*3	Import data from Excel;
%import(&input, infile.csv,d1);


*This macro generates the exact distribution of costs for the various vessel types;
%macro boat(class,short,ini);

*4	Get data for a vessel type defined by macro variable;
data d2;
	set d1;
		if Class="&class";
run;

*5	Redefine the vessel count and the percentage variable, so the vessell count is the same for all practices
		and the percentage is the percentage of all vessels that will incur the costs;
proc sql;
	create table d3 as select
		d2.*,
		max(count) as maxcount,
		count/max(count)*percent as newpercent
	from d2;
quit;

*6 generate a data set for each practice type and format the data set
		Practice_X 	=1 when practice X is used and =0 OTW
		Count_X    	=Number of Vessels of type &class
		percent_X	=Probability of a sceanio
		cost_X		=is the cost of practice X under a scenario (zero when Practice_X 	=0)
			;
%macro divide(est,top);
	%do i=1 %to &top;
		data x1;
			set d3;
				if estimate="&est" and practice=&i;
				practice_&i		=1;
				count_&i		=maxcount;
				percent_&i		=newpercent;
				cost_&i			=cost;
		run;
		data x2;
			set d3;
				if estimate="&est" and practice=&i;
				practice_&i		=0;
				count_&i		=maxcount;
				percent_&i		=1-newpercent;
				cost_&i			=0;
		run;
		data &est.&i (keep=practice_&i count_&i percent_&i cost_&i);
			set x1 x2;
				if percent_&i~=0;
		run;
	%end;
%mend;
%divide(H,21); *there are 19 high cost practices;
%divide(L,16); *there are 16 low cost practices;

*7	Initialize first data set for cartesian product joins;
data HH1;
	set h1;
run;
data LL1;
	set L1;
run;

*8	This macro performs a series of cartesian product joins of the practice-specific data sets.
		A cartesian product join generates all possible combinations between two datasets;
%macro cartesian(l,h);
proc sql;
	create table HH&h as select
		hh&l..*,h&h..*
	from hh&l.,h&h.;
quit;
proc sql;
	create table LL&h as select
		LL&l..*,l&h..*
	from LL&l.,l&h.;
quit;
%mend;
%cartesian(1,2);
%cartesian(2,3);
%cartesian(3,4);
%cartesian(4,5);
%cartesian(5,6);
%cartesian(6,7);
%cartesian(7,8);
%cartesian(8,9);
%cartesian(9,10);
%cartesian(10,11);
%cartesian(11,12);
%cartesian(12,13);
%cartesian(13,14);
%cartesian(14,15);
%cartesian(15,16);

*9	This macro performs a series of cartesian product joins of the High Cost practice-specific data sets.
		A cartesian product join generates all possible combinations between two datasets;
%macro cartesian2(l,h);
proc sql;
	create table HH&h as select
		hh&l..*,h&h..*
	from hh&l.,h&h.;
quit;
%mend;
%cartesian2(16,17);
%cartesian2(17,18);
%cartesian2(18,19);
%cartesian2(19,20);
%cartesian2(20,21);
*10	(for high cost estimates) Calculate probability of each practice combination.
		Calculate Number of Vessels affected under each practice combination.
		Calculate per-vessel and total cost of each practice combination.  
		Drop combinations that are not observed.;
data d4 (compress=yes drop=count_1-count_21);
	set hh21;
		&ini.probability=percent_1*
					percent_2*
					percent_3*
					percent_4*
					percent_5*
					percent_6*
					percent_7*
					percent_8*
					percent_9*
					percent_10*
					percent_11*
					percent_12*
					percent_13*
					percent_14*
					percent_15*
					percent_16*
					percent_17*
					percent_18*
					percent_19*
					percent_20*
					percent_21;
		if &ini.probability~=0;
		&ini.count=count_1*probability;
		&ini.cost=sum(of cost_1-cost_21);
		&ini.totalcost=cost*count;
run;

*11	Format dataset;
data High&short (drop=practice_1-practice_21 i &ini.count &ini.TotalCost Class compress=yes);
	retain Class practice_1_practice_21;* probability count cost TotalCost;
	set d4 (keep = practice_1-practice_21 &ini.probability &ini.count &ini.cost &ini.TotalCost);
	format class $40.;
	Class="&Class";
		array p(21) practice_1-practice_21;
		array ne(21) &ini._1-&ini._21;
			do i=1 to 21;
				ne(i)=p(i);
			end;
run;

*12	(for Low cost estimates) Calculate probability of each practice combination.
		Calculate Number of Vessels affected under each practice combination.
		Calculate per-vessel and total cost of each practice combination.  
		Drop combinations that are not observed.;
data d5 (compress=yes drop=count_1-count_16);
	set LL16;
		&ini.probability=percent_1*
					percent_2*
					percent_3*
					percent_4*
					percent_5*
					percent_6*
					percent_7*
					percent_8*
					percent_9*
					percent_10*
					percent_11*
					percent_12*
					percent_13*
					percent_14*
					percent_15*
					percent_16;
		if &ini.probability~=0;
		&ini.count=count_1*&ini.probability;
		&ini.cost=sum(of cost_1-cost_16);
		&ini.totalcost=&ini.cost*count;
run;

*13	Format dataset;
data Low&short (drop=practice_1-practice_16 i &ini.count &ini.TotalCost Class compress=yes);
	retain practice_1_practice_16;* probability count cost TotalCost;
	set d5 (keep = practice_1-practice_16 &ini.probability &ini.count &ini.cost &ini.TotalCost);
		array p(16) practice_1-practice_16;
		array ne(16) &ini._1-&ini._16;
			do i=1 to 16;
				ne(i)=p(i);
			end;
	format class $40.;
	Class="&Class";
run;

*14 Sort by per-vessel cost;
proc sort data=Low&short; by &ini.cost; run;
proc sort data=High&short; by &ini.cost; run;

*15 Export Data to Excel;
%export(low&short.,&output.,Low&short..csv);
%export(High&short.,&output.,High&short..csv);

%mend;
%boat(Commercial Fishing,Comm,cf);
%boat(Utility Vessels,Utility,ut);
%mend;
*%cathin(&loc.Distribution Analysis Inputs CATH.csv,nocath);
%cathin(&input.,withcath);

