libname library "C:\Documents and Settings\Administrator\My Documents\MyProg\For website"; /* The macro code creates several temporary datasets and subsequently deletes them. Do not use this macro with datasets with names identical to those used by these macros. */ *===============================================================================; *===============================================================================; /******************************************************************************* V_Skew_CDF macro: Calculating variance and probabilities of a skewed distribution, using the CDF of 5 levels skewed distribution. PURPOSE: The %V_Skew_CDF macro implements the ideas described in the document "Extending the Definition of a Slight Skew" (available in this website). USAGE: The macro input/output parameters are: N_LEV= Desired number of item levels, the specified value can be >=2. I_PROBS= Vector that contains the item levels probability vector of the 5 levels skewed reference distribution. The 5 levels are assumed to be 1,2,3,4,5. Default value for i_probs is: (0.05 0.15 0.2 0.35 0.25). NEW_P_OUT= Output dataset name with calculated probabilities. V_REF= Output dataset name with calculated reference distribution variance. LIMITATIONS: No error checking is done. The macro assumes that: - dataset name entered for V_REF= is valid. - sum of I_PROBS=1. - I_PROBS contains 5 values, each between 0 to 1. EXAMPLE: This example calculates the variance of 5 levels item with slightly skewed probability distribution, using the cdf method. %V_Skew_CDF(n_lev=3,v_ref=v5_cdf,new_p_out=new_p_out); OUTPUT EXAMPLE: NULL VARIANCE INFO VARIANCE MEAN N_LEV NULL VARIANCE MEAN OF NULL DIST NUMBER OF LEVELS 0.522 2.333 3.000 NEW_P PROBABILITY VECTOR 0.1500 0.3667 0.4833 ************************************************************************/ %macro V_Skew_CDF(n_lev=, /* Desired number of item levels */ i_probs={0.05 0.15 0.2 0.35 0.25}, /* 5 levels probability vector */ new_p_out=, /* new probability output dataset name */ v_ref=); /* variance output dataset name */ proc iml; cp=cusum(&i_probs); /*cumulative item level probabilities */ n_lev=&n_lev; /* Define the CDF */ start f5_cdf(t,cp); v = cp[1]*t*(t<=1)+ ( cp[1]+(cp[2]-cp[1])*(t-1) ) *((t>1) &(t<=2))+ ( cp[2]+(cp[3]-cp[2])*(t-2) ) *((t>2) &(t<=3))+ ( cp[3]+(cp[4]-cp[3])*(t-3) ) *((t>3) &(t<=4))+ ( cp[4]+(cp[5]-cp[4])*(t-4) ) *((t>4) &(t<=5)) ; return(v); finish f5_cdf; /* Calculate new probability vector of item with n_lev levels */ new_p=j(1,n_lev); i_lev= 1:n_lev ; delt=5.00/n_lev; new_p[1]=f5_cdf(delt,cp); do i=2 to n_lev ; new_p[i]=f5_cdf(i*delt,cp)-f5_cdf((i-1)*delt,cp); end; /* Calculate expected value and variance of item with n_lev levels */ mean=i_lev*t(new_p); variance=( (i_lev-j(1,n_lev,mean))#(i_lev-j(1,n_lev,mean)) )*t(new_p); print "NULL VARIANCE INFO" ,, variance[format=f8.3 colname=" NULL VARIANCE"] mean[format=f8.3 colname=" MEAN OF NULL DIST"] n_lev [format=f8.3 colname=" NUMBER OF LEVELS"] new_p [format=f8.4 colname=" PROBABILITY VECTOR"]; create &v_ref from variance; /* write out rho_z to sas dataset */ append from variance; create &new_p_out from new_p; /* write out new_p to sas dataset */ append from new_p; quit; %mend V_Skew_CDF; *===============================================================================; *===============================================================================; /******************************************************************************** USER_PROB macro: Calculating variance based on the distribution and the number of item levels specified by the user. PURPOSE: Calculates the null variance based on a distribution specified by the user, for the number of levels specified by the user. USAGE: The macro input/output parameters are: N_LEV= Desired number of item levels. I_PROBS= Vector that contains the item levels probability vector of the number of levels specified by the user. NEW_P_OUT= Output dataset name with probabilities. V_REF= Output dataset name with calculated distribution variance. LIMITATIONS: No error checking is done. The macro assumes that: - dataset name entered for V_REF= is valid. - sum of I_PROBS=1. - The number of values specified in I_PROBS is the same as the number levels specified in N_LEV. - Each value specified in I_PROBS is between 0 to 1. EXAMPLE: This example calculates the variance of 5 levels item with slightly skewed probability distribution, using the cdf method. %USER_PROB (n_lev=7, v_ref=vnull, new_p_out=pout, i_probs= {0.05 0.10 0.15 0.20 0.25 0.15 0.10}); OUTPUT EXAMPLE: NULL VARIANCE INFO VARIANCE MEAN NULL VARIANCE MEAN OF NULL DIST 2.628 4.350 **************************************************************************************/ %macro USER_PROB(n_lev=, /* Desired number of item levels*/ i_probs=, /* Probability vector */ new_p_out=, /* probability output dataset name */ v_ref= ); /* Variance output dataset name */ proc iml; i_probs=&i_probs; n_lev=&n_lev; i_lev= 1:&n_lev ; /* Calculate expected value and variance of item with n_lev levels */ mean=i_lev*t(&i_probs); variance=( (i_lev-j(1,n_lev,mean))#(i_lev-j(1,n_lev,mean)) )*t(&i_probs); print "NULL VARIANCE INFO" ,, variance[format=f8.3 colname=" NULL VARIANCE"] mean[format=f8.3 colname=" MEAN OF NULL DIST"]; create &v_ref from variance; /* write out rho_z to sas dataset */ append from variance; create &new_p_out from i_probs; /* write out probability vector to sas dataset */ append from i_probs; quit; %mend USER_PROB; *===============================================================================; *===============================================================================; /******************************************************************************** RWG macro: Calculating RWG value for given input parameters. REQUIRES: BASE SAS and SAS/STAT Software and SAS/IML Software, Version 8.0 TSM0 or later. USAGE: The macro input/output parameters are: DATA= Input SAS dataset name. DATA_OUT Output SAS dataset name. GROUP= Group identification variable. ITEM_LIST= List of item names. N_ITEMS= Number of items in the scale. N_LEV= Number of levels in items' scale. NULL_TYPE= Type of null distribution of item. If NULL_TYPE= USERN then the null distribution is defined by the user and the probability vector of this distribution should be supplied in the NULL_USERN_PROB vector. If NULL_TYPE= Calculates the null variance based on the basic reference 5 levels null distribution which is defined by the user. The probabilities of this basic 5 levels null distribution should be supplied in the NULL_USER_PROB vector. If NULL_TYPE=SSK (=Slightly Skewed) then the basic probability vector for the null distribution is: (.10,.20,.30,.15,.25). If NULL_TYPE=UN (=Uniform) then the probability vector for the null distribution is: 1/n_lev NULL_USER_PROB= 5 elements vector, that contains the item levels basic probability vector for null distribution, given by the user. The 5 levels are assumed to be 1,2,3,4,5. It should be written according to the following example: (0.05 0.15 0.2 0.35 0.25). NULL_USERN_PROB= vector with number of elements equal to the number of item levels that contains the item levels probabilities for null distribution. It should be written according to the following example (for item levels=7): {0.05 0.10 0.15 0.20 0.25 0.15 0.10}. EXAMPLE: This example calculates RWG for the task significance scale which is included in the data of Bliese, Halverson & Schriesheim (2002), pertaining to a sample of 2042 U.S Army soldiers in 49 U.S. Army Companies. These data are included in the Multilevel Modeling Package of "R", and can be accessed by users (Bliese, 2006). %RWG(data=newlq_task, data_out=RWG, group=compid, items_list=tsig01-tsig03, n_items=3, N_LEV=5, null_type=UN); OUTPUT EXAMPLE: NULL VARIANCE INFO VARIANCE MEAN N_LEV NULL VARIANCE MEAN OF NULL DIST NUMBER OF LEVELS 2.000 3.000 5.000 NEW_P PROBABILITY VECTOR 0.2000 0.2000 0.2000 0.2000 0.2000 The MEANS Procedure Analysis Variable : rwg N Minimum Mean Median Maximum Variance ャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャ 49 0 0.5847126 0.6065520 0.8195384 0.0260843 ャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャャ ************************************************************************/ %macro RWG (data=, /* Location SAS Dataset Name */ data_out=, /* Name of output SAS dataset */ group=, /* Group identification number */ items_list=, /* List of items*/ n_items=, /* Number of items in the scale */ n_lev=, /* Number of levels in items */ null_type=, /* Type of null distribution */ null_user_prob=, /* User defined 5 levels basic probability vector */ null_usern_prob=); /* User defined probability vector */ %if &null_type=SSK %then %V_Skew_CDF(n_lev=&n_lev,i_probs={0.05 0.15 0.2 0.35 0.25}, v_ref=v_null,new_p_out=_p_null_); %if &null_type=UN %then %V_Skew_CDF(n_lev=&n_lev,i_probs={0.2 0.2 0.2 0.2 0.2}, v_ref=v_null,new_p_out=_p_null_); %if &null_type=USER %then %V_Skew_CDF(n_lev=&n_lev,i_probs=&null_user_prob, v_ref=v_null, new_p_out=_p_null_); %if &null_type=USERN %then %USER_PROB (n_lev=&n_lev,i_probs=&null_usern_prob, v_ref=v_null, new_p_out=_p_null_); data _null_; /*store variance of null distribution as macro variable */ set v_null; v_null_n=col1; call symput('v_null_n',v_null_n); run; /* calculating RWG */ proc sort data=&data out=_in_dat_ ; by &group; run; proc means data=_in_dat_ noprint; var &items_list; output out=_vars_ var=var1-var&n_items; /*create variances*/ by &group; run; data &data_out; set _vars_; meanvar=mean(of var1-var&n_items); v_ratio=meanvar/&v_null_n; If v_ratio>1 then rwg=0; else rwg =&n_items*(1-v_ratio)/(&n_items*(1-v_ratio)+v_ratio); keep &group rwg; run; /* provides summary statistics of RWG distribution across data */ proc means N min mean median max var data= &data_out; var rwg; run; %mend RWG; *===============================================================================; *===============================================================================; /******************************************************************************** MvRwg macro: Calculating RWG critical value. PURPOSE: This macro provides the .95 quantile of the RWG distribution for given number of items, group size, the correlation between items and null distribution. REQUIRES: BASE SAS and SAS/STAT Software and SAS/IML Software, Version 8.0 TSM0 or later. USAGE: The macro input/output parameters are: CORR_IN= SAS data set which contains the correlation matrix. The macro expects m variables and m observations in the data set, where m is the number of items to be generated for each observation. G= Number of observations to be generated in each sample (number of group members). N_SAMP= Number of samples for each IML running (the default is 1000). NN_SAMP= Number of IML running (the default is 100). The total number of samples for each macro running is N_SAMP*NN_SAMP SEED= Seed value for the random number generator. Default value is 0, which will use the computer clock to initialize the random number generator (in which case the results are not replicable). N_LEV= Number of levels in items' scale. N_ITEMS= Number of items. SAMPLE= SAS data set name for the resulting multivariate items data + last variable which is sample number. The variable names will be I1-Im for the items columns and Nsmp for the last column which contains the sample number. I_GROUP= Name of group identification indicator (variable). ITEM_LIST= Items list. SAMP_TYPE= Type of sample distribution of item. If NULL_TYPE= USERN then the sample distribution is defined by the user and the probability vector of this distribution should be supplied in the NULL_USERN_PROB vector. If NULL_TYPE= USER then the probability vector for the basic 5 levels sample distribution is defined by the user. The probabilities of this reference 5 levels distribution should be supplied in the NULL_USER_PROB vector. If NULL_TYPE=SSK (=Slightly Skewed) then the basic probability vector for the sample distribution is: (.10,.20,.30,.15,.25). If NULL_TYPE=UN (=Uniform) then the probability vector for the sample distribution is: 1/n_lev SAMP_USER_PROB= 5 elements vector, that contains the item levels basic probability vector for sample distribution, given by the user. The 5 levels are assumed to be 1,2,3,4,5. It should be written according to the following example: (0.05 0.15 0.2 0.35 0.25). SAMP_USERN_PROB= vector with number of elements equal to the number of item levels that contains the item levels probabilities for sample distribution. It should be written according to the following example (for item levels=7): {0.05 0.10 0.15 0.20 0.25 0.15 0.10}. NULL_TYPE= Type of null distribution of item. If NULL_TYPE= USERN then the null distribution is defined by the user and the probability vector of this distribution should be supplied in the NULL_USERN_PROB vector. If NULL_TYPE=USER Calculates the null variance based on the basic reference 5 levels null distribution which is defined by the user. The probabilities of this basic 5 levels null distribution should be supplied in the NULL_USER_PROB vector. If NULL_TYPE=SSK (=Slightly Skewed) then the basic probability vector for the null distribution is: (.10,.20,.30,.15,.25). If NULL_TYPE=UN (=Uniform) then the probability vector for the null distribution is: 1/n_lev NULL_USER_PROB= 5 elements vector, that contains the item levels basic probability vector for null distribution, given by the user. The 5 levels are assumed to be 1,2,3,4,5. It should be written according to the following example: (0.05 0.15 0.2 0.35 0.25). NULL_USERN_PROB= vector with number of elements equal to the number of item levels that contains the item levels probabilities for null distribution. It should be written according to the following example (for item levels=7): {0.05 0.10 0.15 0.20 0.25 0.15 0.10}. RWG_OUT= RWG output dataset name. LIMITATIONS: The macro assumes that: - Dataset names entered are valid, and exist in the case of the CORR_IN= option. - Number of columns (= number of rows) of IN_CORR = Number of Items specified in N_ITEMS= option. - Length of NULL_USER_PROB=5 and sum of NULL_USER_PROB=1. - Length of NULL_USERN_PROB vector = Number of item levels specified in n_lev and sum of NULL_USERN_PROB=1. EXAMPLE: This example provides the .95 quantile of the RWG distribution of a group with 10 observations (group size=10), for a 5-item scale, with 5 levels skewed probability distribution, for a skewed null distribution and compound symmetry correlation matrix between items with correlation coefficient equal to 0.4. The quantile is calculated by simulating 100,000 samples. * Store the correlation matrix in a data set; data corr_in; input m1-m5; cards; 1.0 0.4 0.4 0.4 0.4 0.4 1.0 0.4 0.4 0.4 0.4 0.4 1.0 0.4 0.4 0.4 0.4 0.4 1.0 0.4 0.4 0.4 0.4 0.4 1.0 ; run; %MVNRwg(CORR_IN=corr_in, G=10, N_SAMP =1000, nn_samp=100, SEED =123, n_lev=5, n_items=5, SAMPLE =sample, I_group=nsmp, item_list=I1 I2 I3 I4 I5, null_type=SSK, samp_type=SSK, rwg_out=rwg); OUTPUT EXAMPLE: NULL VARIANCE INFO VARIANCE MEAN N_LEV NULL VARIANCE MEAN OF NULL DIST NUMBER OF LEVELS 1.340 3.600 5.000 NEW_P PROBABILITY VECTOR 0.0500 0.1500 0.2000 0.3500 0.2500 .95 percentile of RWG distribution Obs rwg 1 0.73196 ***************************************************************************/ %macro MVNRwg(corr_in=, /* Dataset for correlation matrix */ G=, /* Number of group members */ n_samp=, /* Number of samples for IML */ nn_samp=, /* Number of IML running */ seed=0, /* Seed for random number generator */ n_lev=, /* Number of item levels (A) */ n_items=, /* Number of items (J) */ sample=, /* Output dataset name */ I_group=nsmp, /* Group variable */ item_list=, /* Items list */ null_type=, /* Type of null distribution*/ samp_type=, /* sample distribution type */ samp_user_prob=, /* User defined 5 levels basic sample distribution probability vector */ samp_usern_prob=, /* User defined sample distribution probability vector */ null_user_prob=, /* User defined 5 levels basic probability vector for null distribution */ null_usern_prob=, /* User defined probability vector for null distribution */ rwg_out=); /* RWG output dataset name */ %if &null_type=SSK %then %V_Skew_CDF(n_lev=&n_lev,i_probs={0.05 0.15 0.2 0.35 0.25}, v_ref=v_null,new_p_out=_p_null_); %if &null_type=UN %then %V_Skew_CDF(n_lev=&n_lev,i_probs={0.2 0.2 0.2 0.2 0.2}, v_ref=v_null,new_p_out=_p_null_); %if &null_type=USER %then %V_Skew_CDF(n_lev=&n_lev,i_probs=&null_user_prob, v_ref=v_null, new_p_out=_p_null_); %if &null_type=USERN %then %USER_PROB (n_lev=&n_lev,i_probs=&null_usern_prob, v_ref=v_null, new_p_out=_p_null_); data _null_; set V_null; v_null_n=col1; call symput('v_null_n',v_null_n); run; %if &samp_type=SSK %then %V_Skew_CDF(n_lev=&n_lev,i_probs={0.05 0.15 0.2 0.35 0.25}, v_ref=Vsamp_,new_p_out=Psamp); %if &samp_type=UN %then %V_Skew_CDF(n_lev=&n_lev,i_probs={0.2 0.2 0.2 0.2 0.2}, v_ref=Vsamp_,new_p_out=Psamp); %if &samp_type=USER %then %V_Skew_CDF(n_lev=&n_lev,i_probs=&samp_user_prob, v_ref=Vsamp_, new_p_out=Psamp); %if &samp_type=USERN %then %USER_PROB (n_lev=&n_lev,i_probs=&samp_usern_prob, v_ref=Vsamp_, new_p_out=Psamp); proc transpose data=Psamp out=_p_sampT_; run; /*transpose p vector*/ data Psamp; set _p_sampT_; drop _name_; run; data par; n_items=&n_items; n_tot =&G*&n_samp*&nn_samp; /* calculates total number of observations */ call symput ("n_items2",n_items); call symput ("n_tot",n_tot); run; /* 1. Generate the multivariate normal data */ data random; array z(&n_items2); call streaminit(&seed); do i=1 to &n_tot; do j=1 to &n_items2; z(j)=rand('normal'); end; output; end; drop j; run; /* 2. Transformation of created multivariate normal data according to pre-specified correlation matrix and creation discrete item values according to the quantiles of pre-specified probabilities */ proc printto log=nolog; run; /* suppresses the SAS Log printing for the following loop */ %do j=1 %to &nn_samp; /* this loop is needed due to IML limitation for matrix size */ data z(keep=z1-z5); set random; n=&G*&n_samp; /* calculates number of observations for IML loop */ if n*(&j -1)+1<=i<=n*&j ; run; proc iml worksize=100; use &corr_in; /* read correlation matrix */ read all into corr; use Psamp; /* read probabilities */ read all into i_prob; use z; /* read multivariate normal data */ read all into z; tz=t(z); /* matrix transpose */ v=nrow(corr); /* calculate number of items */ v_prob=nrow(i_prob); /* calculate number of item levels */ v_prob_1=v_prob-1; l=t(root(corr)); /* calculate cholesky root of cor matrix */ x=l*tz; /* premultiply by cholesky root */ tx=t(x); /* begin cutting*/ c_prob=cusum(i_prob); /* cumulative item level probabilities */ q_item=probit(c_prob[1:(v_prob_1),1]); /* calculate normal quantiles*/ txI=(tx>q_item[v_prob_1])*v_prob; /* create highest item level */ txI=txI+(tx<=q_item[1]); /* create lowest item level, which is assumed to be 1 */ do i=2 to v_prob_1; /* create all other item levels */ txI=txI+( (tx>q_item[i-1])&(tx<=q_item[i]) )*i; end; samp_num=shape(t(shape(1:&n_samp,&G,&n_samp)),1); /* create sample number */ txI=txI||t(samp_num); /* add sample number */ t_nam=concat('I',compress(char(1:v)))||'Nsmp'; /* create variable names */ create &sample._&G._&j from txI [colname=t_nam]; /* write out sample data+sample number to sas dataset */ append from txI; quit; /* Calculating RWG */ proc sort data=&sample._&G._&j out=_tmp_in_dat_ ; by &I_group; run; proc means data=_tmp_in_dat_ noprint; /* create variances */ var &item_list; output out=_tmpV_ var=var1-var&n_items; by &I_group; run; data rwg_&n_lev._&n_items._&G._&j ; set _tmpV_; meanvar=mean(of var1-var&n_items); v_ratio=meanvar/&v_null_n; If v_ratio>1 then rwg=0; else rwg =&n_items*(1-v_ratio)/(&n_items*(1-v_ratio)+v_ratio); keep &I_group rwg; run; proc append base=rwg_&n_lev._&n_items._&G data=rwg_&n_lev._&n_items._&G._&j ; run; proc datasets library=work nolist; delete rwg_&n_lev._&n_items._&G._&j &sample._&G._&j _tmp_in_dat_ _tmpv_ z; run; %end; proc printto log=log; run; /* Calculating the 95th percentile for RWG */ proc freq data=rwg_&n_lev._&n_items._&G noprint; tables rwg/ out=freq_&n_lev._&n_items._&G outcum; run; data freq_&n_lev._&n_items._&G; set freq_&n_lev._&n_items._&G; pct_lag=lag1(cum_pct); pct_lag2=lag2(cum_pct); if cum_pct>95 and pct_lag=>95 and pct_lag2<95 then rwgc=rwg; run; data A&n_lev._J&n_items._&G; set freq_&n_lev._&n_items._&G; keep rwg; where rwgc^=.; run; Proc print data= A&n_lev._J&n_items._&G; var RWG; title '.95 percentile of RWG distribution'; run; proc datasets library=work nolist; delete freq_&n_lev._&n_items._&G rwg_&n_lev._&n_items._&G par random; run; title ; %mend MVNRwg;