*Matthew LaPenta;
*Created 02.08.08;

*Modified 12.11.08 updated documentation;
*Modified 11.21.08 revised analysis;
*Modified 10.24.08 revised analysis;
*Modified 02.26.08 add average to output;
*Modified 02.22.08 add distributional information;
*Modified 02.21.08 increase number of samples;
*Modified 02.20.08 increase program efficiency;
*Modified 02.19.08 use revenue bracket-specific numbers of vessels;
*Modified 02.16.08 output naics-firm-specific distributions;
*Modified 02.15.08 output naics-firm-specific distributions;
*Modified 02.12.08 output naics-specific distributions;
*Modified 02.11.08 (developing program); 
*Description
	There are 16 low-cost and 21 high-cost practices required of vessels subject to the discharge rule.
		For each individual practice there is a probability that an incremental cost is incurred under a 
		rule scenario.  This program calculates the probability that incremental costs are incurred for each 
		possible combination of practices (assuming that the probability of incurring incremental costs is
		independent across practices).  The program Calculates the Distribution of Firm Cost Impacts 
		by Sector and Revenue Bracket The percentage of firms with impacts exceeding 1% and 3% of their 
		revenue is also calculated.  ;
*Drive (c or h);
	%let d=h;
*File Location;
	%let mainloc=H:\Projects\Tyche_Option_1_Task_1-04\SAS Analysis\;
	%let inloc=&mainloc.Inputs\;
	%let outloc=&mainloc.Outputs\;

	*Inputs;
%let input=&inloc.Distribution Analysis Inputs NEW;
*Outputs;
%let samp=11;
%let output=&outloc.DAO&samp._;
%let output2=&outloc.Dist&samp._;
*let output3=&outloc.Dist&samp._;

libname temp "&mainloc.Temp";
libname temp2 "&mainloc.Temp\temp2";
libname temp3 "&mainloc.Temp\temp3";
libname temp4 "&mainloc.Temp\temp4";

PROC DATASETS LIBRARY=temp   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=temp2   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=temp3   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=temp4   MEMTYPE=DATA    KILL nolist; 
QUIT;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA    KILL nolist; 
QUIT;
*Begin Program;

*This Macro is used to Import data from excel;
%macro import(infile,sheet,out);
PROC IMPORT OUT= WORK.&out 
            DATAFILE= "&infile. &sheet." 
            DBMS=CSV REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN;
%mend;
*This Macro is used to Export data to excel;
%macro export(datafile,outfile,sheet);

PROC EXPORT DATA= &datafile.
            OUTFILE= "&outfile.&sheet."
            DBMS=CSV REPLACE;
     PUTNAMES=YES;
RUN;

%mend;

%import(&input, naicsnum.csv,num);
data temp2.num;
	set num;
run;

*	1	get thresholds data;
%import(&input, NAICSthresh.csv,th1);
data TEMP2.th2 (keep= NAICS O1-O11 T1-T11);
	set th1 (keep=NAICS A1-A11);
		array A(11) A1-A11;
		array O(11) O1-O11;
		array T(11) T1-T11;
			do i=1 to 11;
				if A(i)>0 then do;
					O(i)=.01*A(i);
					T(i)=.03*A(i);
				end;
			end;
run;

*	2a	Get percentages of vessel types for NAICS (Import);
%import(&input, NAICSWEIGHTS.csv,n1);

*	2b	Get percentages of vessel types for NAICS (drop unneded vars);
data n2;
	set n1;
		if class="Dummy" then delete;
run;

*	2c	Get percentages of vessel types for NAICS (Get total number of vessels by NAICS);
proc sql;
	create table n3 as select
		n2.*,
		number/sum(number) as percent
	from n2 group by NAICS order by NAICS,Class;
quit;

*	2d	Get percentages of vessel types for NAICS (Get percentage for each vessel type);
%macro boat2(class,short,ini);
data x_&ini. (keep=NAICS &ini._percent);
	set n3;
		if Class="&class";
		rename percent=&ini._percent;
run;
%mend;
%boat2(Cruise Ships,Cruise,cs);
%boat2(Commercial Fishing,Comm,cf);
%boat2(Freight Barges,FBarge,fb);
%boat2(Freight Ships,FShips,fs);
%boat2(Passenger Vessels,Pass,pv);
%boat2(Recreational Ships,Rec,rs);
%boat2(Tank Barges,TBarge,tb);
%boat2(Tank Ships,TShips,ts);
%boat2(Utility Vessels,Utility,ut);
*	2c	Get percentages of vessel types for NAICS (Combine data);
data TEMP2.n4;
	merge x_cs x_cf x_fb x_fs x_pv x_rs x_tb x_ts x_ut;
	by NAICS;
run;


*3	Import data from Excel;
%import(&input, infile.csv,d1);


*This macro generates the exact distribution of costs for the various vessel types;
%macro boat(class,short,ini);

*4	Get data for a vessel type defined by macro variable;
data d2;
	set d1;
		if Class="&class";
run;

*5	Redefine the vessel count and the percentage variable, so the vessell count is the same for all practices
		and the percentage is the percentage of all vessels that will incur the costs;
proc sql;
	create table d3 as select
		d2.*,
		max(count) as maxcount,
		count/max(count)*percent as newpercent
	from d2;
quit;

*6 generate a data set for each practice type and format the data set
		Practice_X 	=1 when practice X is used and =0 OTW
		Count_X    	=Number of Vessels of type &class
		percent_X	=Probability of a sceanio
		cost_X		=is the cost of practice X under a scenario (zero when Practice_X 	=0)
			;
%macro divide(est,top);
	%do i=1 %to &top;
		data x1;
			set d3;
				if estimate="&est" and practice=&i;
				practice_&i		=1;
				count_&i		=maxcount;
				percent_&i		=newpercent;
				cost_&i			=cost;
		run;
		data x2;
			set d3;
				if estimate="&est" and practice=&i;
				practice_&i		=0;
				count_&i		=maxcount;
				percent_&i		=1-newpercent;
				cost_&i			=0;
		run;
		data &est.&i (keep=practice_&i count_&i percent_&i cost_&i);
			set x1 x2;
				if percent_&i~=0;
		run;
	%end;
%mend;
%divide(H,21); *there are 21 high cost practices;
%divide(L,16); *there are 16 low cost practices;

*7	Initialize first data set for cartesian product joins;
data HH1;
	set h1;
run;
data LL1;
	set L1;
run;

*8	This macro performs a series of cartesian product joins of the practice-specific data sets.
		A cartesian product join generates all possible combinations between two datasets;
%macro cartesian(l,h);
proc sql;
	create table HH&h as select
		hh&l..*,h&h..*
	from hh&l.,h&h.;
quit;
proc sql;
	create table LL&h as select
		LL&l..*,l&h..*
	from LL&l.,l&h.;
quit;
%mend;
%cartesian(1,2);
%cartesian(2,3);
%cartesian(3,4);
%cartesian(4,5);
%cartesian(5,6);
%cartesian(6,7);
%cartesian(7,8);
%cartesian(8,9);
%cartesian(9,10);
%cartesian(10,11);
%cartesian(11,12);
%cartesian(12,13);
%cartesian(13,14);
%cartesian(14,15);
%cartesian(15,16)
*9	This macro performs a series of cartesian product joins of the High Cost practice-specific data sets.
		A cartesian product join generates all possible combinations between two datasets;
%macro cartesian2(l,h);
proc sql;
	create table HH&h as select
		hh&l..*,h&h..*
	from hh&l.,h&h.;
quit;
%mend;
%cartesian2(16,17);
%cartesian2(17,18);
%cartesian2(18,19);
%cartesian2(19,20);
%cartesian2(20,21);

*10	(for high cost estimates) Calculate probability of each practice combination.
		Calculate Number of Vessels affected under each practice combination.
		Calculate per-vessel and total cost of each practice combination.  
		Drop combinations that are not observed.;
data d4 (compress=yes drop=count_1-count_21);
	set hh21;
		&ini.probability=percent_1*
					percent_2*
					percent_3*
					percent_4*
					percent_5*
					percent_6*
					percent_7*
					percent_8*
					percent_9*
					percent_10*
					percent_11*
					percent_12*
					percent_13*
					percent_14*
					percent_15*
					percent_16*
					percent_17*
					percent_18*
					percent_19*
					percent_20*
					percent_21;
		if &ini.probability~=0;
		&ini.count=count_1*probability;
		&ini.cost=sum(of cost_1-cost_21);
		&ini.totalcost=cost*count;
run;

*11	Format dataset;
data High&short (drop=practice_1-practice_21 i &ini.count &ini.TotalCost Class compress=yes);
	retain Class practice_1-practice_21;* probability count cost TotalCost;
	set d4 (keep = practice_1-practice_21 &ini.probability &ini.count &ini.cost &ini.TotalCost);
	format class $40.;
	Class="&Class";
		array p(21) practice_1-practice_21;
		array ne(21) &ini._1-&ini._21;
			do i=1 to 21;
				ne(i)=p(i);
			end;
run;

*12	(for Low cost estimates) Calculate probability of each practice combination.
		Calculate Number of Vessels affected under each practice combination.
		Calculate per-vessel and total cost of each practice combination.  
		Drop combinations that are not observed.;
data d5 (compress=yes drop=count_1-count_16);
	set LL16;
		&ini.probability=percent_1*
					percent_2*
					percent_3*
					percent_4*
					percent_5*
					percent_6*
					percent_7*
					percent_8*
					percent_9*
					percent_10*
					percent_11*
					percent_12*
					percent_13*
					percent_14*
					percent_15*
					percent_16
;
		if &ini.probability~=0;
		&ini.count=count_1*&ini.probability;
		&ini.cost=sum(of cost_1-cost_16);
		&ini.totalcost=&ini.cost*count;
run;

*13	Format dataset;
data Low&short (drop=practice_1-practice_16 i &ini.count &ini.TotalCost Class compress=yes);
	retain practice_1_practice_16;* probability count cost TotalCost;
	set d5 (keep = practice_1-practice_16 &ini.probability &ini.count &ini.cost &ini.TotalCost);
		array p(16) practice_1-practice_16;
		array ne(16) &ini._1-&ini._16;
			do i=1 to 16;
				ne(i)=p(i);
			end;
	format class $40.;
	Class="&Class";
run;

*14 Sort by per-vessel cost;
proc sort data=Low&short; by &ini.cost; run;
proc sort data=High&short; by &ini.cost; run;

*15 Export Data to Excel;
*%export(Low&short,"&output.",Low&short);
*%export(High&short,"&output.",High&short);

*Calculate Percentiles;
%macro percentiles(ty);

proc univariate data=&ty.&short. noprint;
			weight &ini.probability;
			output out=&ty.&short.1 PCTLPRE=&ini.C_ 
				PCTLPTS= 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 
				Min=&ini.C_0 Max=&ini.C_100;
			var &ini.Cost;
run;

proc transpose data=&ty.&short.1 out=&ty.&short.2 prefix=&ini.C_ ;
	var &ini.C_0 &ini.C_5 &ini.C_10 &ini.C_15 &ini.C_20 &ini.C_25 &ini.C_30 &ini.C_35 
		&ini.C_40 &ini.C_45 &ini.C_50 &ini.C_55 &ini.C_60 &ini.C_65 &ini.C_70 &ini.C_75 &ini.C_80 &ini.C_85 &ini.C_90 &ini.C_95 &ini.C_100;
run;
data temp3.&ty.&short.3 (keep=&ini._Cost compress=Yes);
	set &ty.&short.2;
		rename &ini.C_1=&ini._Cost;
*3;*		if 		_n_=1 | _n_=11 | _n_=21;
*6;*		if 		_n_=1 | _n_=5 | _n_=9 | _n_=13 | _n_=17 | _n_=21;
*11;		if 		_n_=1 | _n_=5 | _n_=9 | _n_=13 | _n_=17 | _n_=21 | 	_n_=3 | _n_=7 | _n_=11 | _n_=15 | _n_=19;
run;
/*
proc univariate data=&ty.&short. noprint;
			weight &ini.probability;
			output out=&ty.&short.1 PCTLPRE=&ini.C_ 
				PCTLPTS= 	2 4 6 8 10 12 14 16 18
							20 22 24 26 28
							30 32 34 36 38
							40 42 44 46 48
							50 52 54 56 58
							60 62 64 66 68
							70 72 74 76 78 
							80 82 84 86 88 
							90 92 94 96 98 
 				Min=&ini.C_0 Max=&ini.C_100;
			var &ini.Cost;
run;
proc transpose data=&ty.&short.1 out=&ty.&short.2 prefix=&ini.C_ ;
	var 
&ini.C_0  &ini.C_2  &ini.C_4  &ini.C_6  &ini.C_8 
&ini.C_10 &ini.C_12 &ini.C_14 &ini.C_16 &ini.C_18
&ini.C_20 &ini.C_22 &ini.C_24 &ini.C_26 &ini.C_28 
&ini.C_30 &ini.C_32 &ini.C_34 &ini.C_36 &ini.C_38 
&ini.C_40 &ini.C_42 &ini.C_44 &ini.C_46 &ini.C_48 
&ini.C_50 &ini.C_52 &ini.C_54 &ini.C_56 &ini.C_58 
&ini.C_60 &ini.C_62 &ini.C_64 &ini.C_66 &ini.C_68 
&ini.C_70 &ini.C_72 &ini.C_74 &ini.C_76 &ini.C_78 
&ini.C_80 &ini.C_82 &ini.C_84 &ini.C_86 &ini.C_88 
&ini.C_90 &ini.C_92 &ini.C_94 &ini.C_96 &ini.C_98 &ini.C_100;
run;
data temp3.&ty.&short.3 (keep=&ini._Cost compress=Yes);
	set &ty.&short.2;
		rename &ini.C_1=&ini._Cost;
		*if 		_n_=1 | _n_=5 | _n_=9 | _n_=13 | _n_=17 | _n_=21 
			| 	_n_=3 | _n_=7 | _n_=11 | _n_=15 | _n_=19;
run;
*/
%mend;
%percentiles(Low);
%percentiles(High);
%mend;
%boat(Cruise Ships,Cruise,cs);
%boat(Commercial Fishing,Comm,cf);
%boat(Freight Barges,FBarge,fb);
%boat(Freight Ships,FShips,fs);
%boat(Passenger Vessels,Pass,pv);
%boat(Recreational Ships,Rec,rs);
%boat(Tank Barges,TBarge,tb);
%boat(Tank Ships,TShips,ts);
%boat(Utility Vessels,Utility,ut);


*The program Calculates the Distribution of Firm Cost Impacts 
		by Sector and Revenue Bracket The percentage of firms with impacts exceeding 1% and 3% of their 
		revenue is also calculated.;
%macro NAICS2(vt,sdf,sff,sdp,sif,sip,sss,sph,smc,sns,sot,
dd1,dd1a,dd1b,dd2,dd3,dd4,dd5,dd6,dd7,dd8,dd9,dd10,
wgt1,wgt2,wgt3,wgt4,wgt5,wgt6,wgt7,wgt8,wgt9,wgt10,wgt11);
%macro percentiles2(ty);

PROC DATASETS LIBRARY=work   MEMTYPE=DATA    KILL nolist; 
QUIT;
proc datasets nolist;
copy in=temp3 out=work;
quit;
run;
PROC DATASETS LIBRARY=temp   MEMTYPE=DATA    KILL nolist; 
QUIT;

&sff		proc sql; *11411	Fin fishing, shell fishing, and other commercial fishing;
&sff		create table TEMP.&ty._FF as select 
&sff				"11411" as NAICS format=$20.,
&sff				&ty.Comm3.*
&sff			from &ty.Comm3;
&sff		quit;

&sph		proc sql; *488310	Port and harbor operations;
&sph		create table TEMP.&ty._PH as select
&sph				"488310" as NAICS format=$20.,
&sph				&ty.Utility3.*
&sph			from &ty.Utility3;
&sph		quit;

%macro loopit(in1,in2,top,out);
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete n &out;
QUIT;
%do j=1 %to &top;
data n;
	set &in2;
	if _N_=&j;
run;
data a&j;
	if _n_=1 then set n;
	set &in1;
run;
proc append base=&out  data=a&j;
run;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete a&j;
QUIT;
%end;
%mend;

&dd1.loopit(&ty.Pass3,&ty.Utility3			,&samp.,d1);
&dd1a.loopit(d1,&ty.Fbarge3					,&samp.,d1a);
&dd1b.loopit(d1,&ty.Tbarge3					,&samp.,d1b);
&dd7.loopit(d1,&ty.cruise3					,&samp.,d7);

&sip		data TEMP.&ty._IP;*483212	Inland waterways passenger transportation and Other water trans;
&sip			set d1;
&sip				format NAICS $20.0;
&sip					NAICS="483212";
&sip		run;

*delete d1;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d1;
QUIT;

&dd2.loopit(d1a,&ty.FShips3					,&samp.,d2);

*delete d1a;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d1a;
QUIT;

&dd6.loopit(d1b,&ty.rec3					,&samp.,d6);

*delete d1b;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d1b;
QUIT;

&sdp		data TEMP.&ty._DP;*483112/483114	Deep Sea/Coastal and Great Lakes Passenger;
&sdp			set d7;
&sdp				format NAICS $20.0;
&sdp					NAICS="483112/483114";
&sdp		run;

*delete d7;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d7;
QUIT;
&sss		data TEMP.&ty._SS;*487210	Scenic and Sightseeing Transportation, Water;
&sss			set d2;
&sss				format NAICS $20.0;
&sss					NAICS="487210";
&sss		run;

&dd3.loopit(d2,&ty.Tbarge3					,&samp.,d3);

*delete d2;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d2;
QUIT;

&dd9.loopit(d6,&ty.fSHIPS3					,&samp.,d9);
&dd10.loopit(d6,&ty.fbarge3					,&samp.,d10);

*delete d6;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d6;
QUIT;

&dd4.loopit(d3,&ty.COMM3					,&samp.,d4);
&dd8.loopit(d3,&ty.TSHIPS3					,&samp.,d8);

*delete d3;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d3;
QUIT;

&sot		data TEMP.&ty._OT;*488390	Other support activities for water transportation;
&sot			set d9;
&sot				format NAICS $20.0;
&sot					NAICS="488390";
&sot		run;

*delete d9;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d9;
QUIT;

&sns		data TEMP.&ty._NS;*488330	Navigational services to shipping and salvage;
&sns			set d10;
&sns				format NAICS $20.0;
&sns					NAICS="488330";
&sns		run;

*delete d10;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d10;
QUIT;

&smc	data TEMP.&ty._MC;*488320	Marine cargo handling;
&smc			set d4;
&smc				format NAICS $20.0;
&smc					NAICS="488320";
&smc		run;

&dd5.loopit(d4,&ty.TSHIPS3					,&samp.,d5);

*delete d4;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d4;
QUIT;

&sdf		data TEMP.&ty._DF;*483111/483113	Deep Sea/Coastal and Great Lakes Freight;
&sdf			set d8;
&sdf				format NAICS $20.0;
&sdf					NAICS="483111/483113";
&sdf		run;

*delete d8;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d8;
QUIT;

&sif		data TEMP.&ty._IF; *483211 Inland waterways freight transportation and towing transportation;
&sif			set d5;
&sif				format NAICS $20.0;
&sif					NAICS="488320";
&sif		run;

*delete d5;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete d5;
QUIT;

%mend percentiles2;
%percentiles2(&ptiles);


%macro percentiles3(ty);
PROC DATASETS LIBRARY=work   MEMTYPE=DATA    KILL nolist; 
QUIT;

proc sql;
		create table large2 as select
			large.*,
			n4.cs_percent,n4.cf_percent,n4.fb_percent,n4.fs_percent,n4.pv_percent,n4.rs_percent,
			n4.tb_percent,n4.ts_percent,n4.ut_percent
		from temp.&TY._&vt. as large left join temp2.n4 as n4 on
		n4.NAICS=large.NAICS;
quit;

data large3;
	set large2;
		cost=	sum(cs_percent*cs_cost,
				cf_percent*cf_cost,
				fb_percent*fb_cost,
				fs_percent*fs_cost,
				pv_percent*pv_cost,
				rs_percent*rs_cost,
				tb_percent*tb_cost,
				ts_percent*ts_cost,
				ut_percent*ut_cost);
run;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete large2;
QUIT;
proc sql;
	create table large3a as select
		large3.*,
			m.m1,m.m2,m.m3,m.m4,m.m5,m.m6,m.m7,m.m8,m.m9,m.m10,m.m11
		from large3 left join temp2.num as m on
		large3.NAICS=m.NAICS;
quit;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete large3;
QUIT;
data large3b;
	set large3a;
		array mult(11) m1-m11;
		array acost(11) acost1-acost11;
		DO I=1 TO 11;
			acost(i)=cost*mult(i);
		END;
run;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete large3a;
QUIT;
proc sql;
	create table large4 as select
		l.acost1,l.acost2,l.acost3,l.acost4,l.acost5,l.acost6,l.acost7,l.acost8,l.acost9,l.acost10,l.acost11,
		t.*
	from large3b as l left join temp2.th2 as t
	on t.NAICS=l.NAICS;
quit;
data temp4.large4;
	set large4;
run;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete large3b;
QUIT;

%macro reshapit;
%do l=1 %to 11;
data l&l (keep=Xweight acost);
	set large4;
		acost=acost&l;
		if		"&l"="1" then Xweight=&wgt1;
		else if "&l"="2" then Xweight=&wgt2;
		else if "&l"="3" then Xweight=&wgt3;
		else if "&l"="4" then Xweight=&wgt4;
		else if "&l"="5" then Xweight=&wgt5;
		else if "&l"="6" then Xweight=&wgt6;
		else if "&l"="7" then Xweight=&wgt7;
		else if "&l"="8" then Xweight=&wgt8;
		else if "&l"="9" then Xweight=&wgt9;
		else if "&l"="10" then Xweight=&wgt10;
		else if "&l"="11" then Xweight=&wgt11;
run;
%end;
%mend;
%reshapit;
data l;
	set l1 l2 l3 l4 l5 l6 l7 l8 l9 l10 l11;
run;

PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete l1 l2 l3 l4 l5 l6 l7 l8 l9 l10 l11;
QUIT;
	proc sort data=l; by acost; run;
proc univariate data=l noprint;
			weight Xweight;
			output out=temp4.&ptiles._&vt. PCTLPRE=P_ 
				PCTLPTS= 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
				Min=P_0 Max=P_100 MEAN=P_A;
			var acost;
run;

PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete l;
QUIT;

data large5;
	set large4;
		array D(11) D1-D11;
		array O(11) O1-O11;
		array T(11) T1-T11;
		array acost(11) acost1-acost11;
			do i=1 to 11;
				if T(i)=. and O(i)~=. then put "flag 1";
				if O(i)=. and T(i)~=. then put "flag 2";
				if T(i)=. then 				D(i)=.;
				else if	acost(i)>T(i) then 	D(i)=3;
				else if	acost(i)>O(i) then	D(i)=1;
				else 						D(i)=0;
			end;
run;

PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete large4;
QUIT;

%macro loopdum;
%do k=1 %to 11;
	proc sql;
			create table x as select
			NAICS,D&k ,
			count(NAICS) as Count&k
			from large5 group by NAICS,D&k;

			create table d&k as select
			NAICS,D&k as D,
			count&k/sum(count&k) as percent&k format=percent8.2
			from x group by NAICS order by NAICS,D;
	quit;
%end;
%mend;
%loopdum;

%macro loopdum2;
%do k=1 %to 11;
	proc sort data=large5 out=x1; by NAICS D&k; run;
	data x2;
		set x1 (keep=NAICS d&k);
			by NAICS D&k;
				if first.d&k then count&k=1;
					else count&k+1;
				if last.D&k;
	run;
	data max (keep=obs);
		set x1 end=last;
			obs=_n_;
			if last;
	run;
	data d&k (drop=count&k);
 		if _n_=1 then set max;
 		set x2;
			rename d&k=d;
			percent&k=count&k/obs;
 	run;
%end;
%mend;
*%loopdum2;
PROC DATASETS LIBRARY=work   MEMTYPE=DATA nolist; 
delete large5;
QUIT;
data temp.&ty.outdata_&vt.;
	merge d1 d2 d3 d4 d5 d6 d7 d8 d9 d10 d11;
	by NAICS D;
	if d=0 | d=1 | d=3;
run;
%export(temp.&ty.outdata_&vt.,&output.,&ty.thresholds_&vt..csv);
%export(temp4.&ptiles._&vt.,&output2.,&ptiles._&vt..csv);
*%export(temp4.&ptiles._&vt.,&output3.,&ptiles._&vt..csv);
%mend;
%percentiles3(&ptiles);
%mend;

%macro ptiles(ptiles);
%NAICS2(vt=DF,sdf= ,sff=*,sdp=*,sif=*,sip=*,sss=*,sph=*,smc=*,sns=*,sot=*,
dd1=%,dd1a=%,dd1b=*,dd2=%,dd3=%,dd4=*,dd5=*,dd6=*,dd7=*,dd8=%,dd9=*,dd10=*,
wgt1=0.049,wgt2=0.149,wgt3=0.116,wgt4=0.175,wgt5=0.175,wgt6=0.102,wgt7=0.114,wgt8=0.074,wgt9=0.047,wgt10=0,wgt11=0);

%NAICS2(vt=FF,sdf=*,sff= ,sdp=*,sif=*,sip=*,sss=*,sph=*,smc=*,sns=*,sot=*,
dd1=*,dd1a=*,dd1b=*,dd2=*,dd3=*,dd4=*,dd5=*,dd6=*,dd7=*,dd8=*,dd9=*,dd10=*,
wgt1=0.286,wgt2=0.481,wgt3=0.118,wgt4=0.089,wgt5=0.014,wgt6=0.012,wgt7=0,wgt8=0,wgt9=0,wgt10=0,wgt11=0);

%NAICS2(vt=DP,sdf=*,sff=*,sdp= ,sif=*,sip=*,sss=*,sph=*,smc=*,sns=*,sot=*,
dd1=%,dd1a=*,dd1b=*,dd2=*,dd3=*,dd4=*,dd5=*,dd6=*,dd7=%,dd8=*,dd9=*,dd10=*,
wgt1=0.134,wgt2=0.197,wgt3=0.142,wgt4=0.079,wgt5=0.205,wgt6=0.142,wgt7=0.102,wgt8=0,wgt9=0,wgt10=0,wgt11=0);

%NAICS2(vt=IF,sdf=*,sff=*,sdp=*,sif= ,sip=*,sss=*,sph=*,smc=*,sns=*,sot=*,
dd1=%,dd1a=%,dd1b=*,dd2=%,dd3=%,dd4=%,dd5=%,dd6=*,dd7=*,dd8=*,dd9=*,dd10=*,
wgt1=0.109,wgt2=0.126,wgt3=0.131,wgt4=0.262,wgt5=0.169,wgt6=0,wgt7=0.115,wgt8=0.087,wgt9=0,wgt10=0,wgt11=0);

%NAICS2(vt=IP,sdf=*,sff=*,sdp=*,sif=*,sip= ,sss=*,sph=*,smc=*,sns=*,sot=*,
dd1=%,dd1a=*,dd1b=*,dd2=*,dd3=*,dd4=*,dd5=*,dd6=*,dd7=*,dd8=*,dd9=*,dd10=*,
wgt1=0.121,wgt2=0.287,wgt3=0.21,wgt4=0.172,wgt5=0.146,wgt6=0.064,wgt7=0,wgt8=0,wgt9=0,wgt10=0,wgt11=0);

%NAICS2(vt=SS,sdf=*,sff=*,sdp=*,sif=*,sip=*,sss= ,sph=*,smc=*,sns=*,sot=*,
dd1=%,dd1a=%,dd1b=*,dd2=%,dd3=*,dd4=*,dd5=*,dd6=*,dd7=*,dd8=*,dd9=*,dd10=*,
wgt1=0.258,wgt2=0.273,wgt3=0.199,wgt4=0.127,wgt5=0.084,wgt6=0.039,wgt7=0.021,wgt8=0,wgt9=0,wgt10=0,wgt11=0);

%NAICS2(vt=PH,sdf=*,sff=*,sdp=*,sif=*,sip=*,sss=*,sph= ,smc=*,sns=*,sot=*,
dd1=*,dd1a=*,dd1b=*,dd2=*,dd3=*,dd4=*,dd5=*,dd6=*,dd7=*,dd8=*,dd9=*,dd10=*,
wgt1=0.098,wgt2=0.098,wgt3=0.154,wgt4=0.146,wgt5=0.203,wgt6=0.146,wgt7=0,wgt8=0.106,wgt9=0.024,wgt10=0.024,wgt11=0);

%NAICS2(vt=MC,sdf=*,sff=*,sdp=*,sif=*,sip=*,sss=*,sph=*,smc= ,sns=*,sot=*,
dd1=%,dd1a=%,dd1b=*,dd2=%,dd3=%,dd4=%,dd5=*,dd6=*,dd7=*,dd8=*,dd9=*,dd10=*,
wgt1=0.068,wgt2=0.127,wgt3=0.094,wgt4=0.143,wgt5=0.179,wgt6=0.134,wgt7=0.104,wgt8=0.078,wgt9=0.036,wgt10=0,wgt11=0);

%NAICS2(vt=NS,sdf=*,sff=*,sdp=*,sif=*,sip=*,sss=*,sph=*,smc=*,sns= ,sot=*,
dd1=%,dd1a=*,dd1b=%,dd2=*,dd3=*,dd4=*,dd5=*,dd6=%,dd7=*,dd8=*,dd9=*,dd10=%,
wgt1=0.085,wgt2=0.225,wgt3=0.234,wgt4=0.147,wgt5=0.135,wgt6=0.079,wgt7=0.056,wgt8=0.04,wgt9=0,wgt10=0,wgt11=0);

%NAICS2(vt=OT,sdf=*,sff=*,sdp=*,sif=*,sip=*,sss=*,sph=*,smc=*,sns=*,sot= ,
dd1=%,dd1a=*,dd1b=%,dd2=*,dd3=*,dd4=*,dd5=*,dd6=%,dd7=*,dd8=*,dd9=%,dd10=*,
wgt1=0.26,wgt2=0.28,wgt3=0.126,wgt4=0.167,wgt5=0.13,wgt6=0,wgt7=0,wgt8=0.037,wgt9=0,wgt10=0,wgt11=0);

%mend;
%ptiles(Low);
%ptiles(High);

/*
df 483111/483113	wgt1=0.049,wgt2=0.149,wgt3=0.116,wgt4=0.175,wgt5=0.175,wgt6=0.102,wgt7=0.114,wgt8=0.074,wgt9=0.047,wgt10=0,wgt11=0
ff 11411			wgt1=0.286,wgt2=0.481,wgt3=0.118,wgt4=0.089,wgt5=0.014,wgt6=0.012,wgt7=0,wgt8=0,wgt9=0,wgt10=0,wgt11=0
dp 483112/483114	wgt1=0.134,wgt2=0.197,wgt3=0.142,wgt4=0.079,wgt5=0.205,wgt6=0.142,wgt7=0.102,wgt8=0,wgt9=0,wgt10=0,wgt11=0
if 483211			wgt1=0.109,wgt2=0.126,wgt3=0.131,wgt4=0.262,wgt5=0.169,wgt6=0,wgt7=0.115,wgt8=0.087,wgt9=0,wgt10=0,wgt11=0
ip 483212			wgt1=0.121,wgt2=0.287,wgt3=0.21,wgt4=0.172,wgt5=0.146,wgt6=0.064,wgt7=0,wgt8=0,wgt9=0,wgt10=0,wgt11=0
ss 487210			wgt1=0.258,wgt2=0.273,wgt3=0.199,wgt4=0.127,wgt5=0.084,wgt6=0.039,wgt7=0.021,wgt8=0,wgt9=0,wgt10=0,wgt11=0
ph 488310			wgt1=0.098,wgt2=0.098,wgt3=0.154,wgt4=0.146,wgt5=0.203,wgt6=0.146,wgt7=0,wgt8=0.106,wgt9=0.024,wgt10=0.024,wgt11=0
mc 488320			wgt1=0.068,wgt2=0.127,wgt3=0.094,wgt4=0.143,wgt5=0.179,wgt6=0.134,wgt7=0.104,wgt8=0.078,wgt9=0.036,wgt10=0,wgt11=0
ns 488330			wgt1=0.085,wgt2=0.225,wgt3=0.234,wgt4=0.147,wgt5=0.135,wgt6=0.079,wgt7=0.056,wgt8=0.04,wgt9=0,wgt10=0,wgt11=0
ot 488390			wgt1=0.26,wgt2=0.28,wgt3=0.126,wgt4=0.167,wgt5=0.13,wgt6=0,wgt7=0,wgt8=0.037,wgt9=0,wgt10=0,wgt11=0
*/
