CSLDOCs.txt by jda, 4-10-00

**********************************************************************************

attached files:

SAVE3.TXT = raw CSL coding
IMPUTE3.LST = original, imputed, imputed and expanded data file (see page numbers on log)

variable list:

MIN = compulsory attendance minimum
MAX = compulsory attendance maximum
MINEXPT = minimum shooling to dropout
MINAWORK = minimum age to work
MINEWORK = minimum education to work

The PUT file SAVE3.TXT is the raw data on the laws w/missing values codes assigned.

"M" = missing
"NR" = not restricted.

Text file IMPUTE3.LST is the result of eliminating 
missing data by assigning data for older cohorts 
and expansion file to cover missing years as described 
in the appendix to NBER WP 7444.

These data are used to code CL and CA.

**********************************************************************************

Code for CL and CA


23         
24         data two; set raw.impute3;
25         **************************************;
26         ***  creating the CSL instruments ****;
27         **************************************;
28         
29         length bplg yearat14 enrolage drop_age req_sch work_age work_sch ca cl 3;
30         
31         yearat14=year*1;
32         bplg=birthpl;
33         
34         if minexpt='NR' or minexpt='NR(?)' then minexpt='0';
35         
36         if minawork='NR' or minawork='NR(?)' then minawork='0';
37         if minework='NR' or minework='NR(?)' then minework='0';
43         
44
45         req_sch=minexpt*1;
46         work_age=minawork*1;
47         work_sch=minework*1;
48         
49         if ((max='NR') or (min='NR')) then ca=max(0, req_sch);
50         if ((max ne 'NR') and (min ne 'NR')) then do;
51          enrolage=min*1;
52          drop_age=max*1;
53          ca=max(drop_age-enrolage, req_sch);
54         end;
55         
56         if min='NR' then cl=max(0, work_sch);
57         if (min ne 'NR') then do;
58          enrolage=min*1;
59          drop_age=max*1;
60          cl=max(work_age-enrolage, work_sch);
61         end;
62         
72         
73         ca8=(ca<=8);
74         ca9=(ca=9);
75         ca10=(ca=10);
76         ca11=(ca>=11);
77         
78         cl6=(cl<=6);
79         cl7=(cl=7);
80         cl8=(cl=8);
81         cl9=(cl>=9);



***********************************************************************************
Program to impute and expand the raw CSL coding
***********************************************************************************

1                                                          The SAS System                           17:38 Thursday, January 21, 1999

1          /* impute3.sas reads updated xng file -- revised by JDA 1-21-99
2          
3             reads a currently FLAT file (see impute1 for rectangularization
4          
5             imputes missing, and expands for inter-source years
6          
7             INFILE csl3.txt attendance data
8             JDA/XNG 1-21-99 /angrist/ipums/csl/raw
9          
10            notes on 12-21-98 (corrected at 1-21
11                               plus data added for 72 and 78
12                 missing dc
13                 check sporadic NRs
14                 non-matches to AK-91 appendix: ME 70, NM 60, ND 60, WI 70
15         
16                 added FIPSA codes from 80 codebook */
17         
18         options ls=78 nocenter ;
19         
20         libname save '/angrist/ipums/csl/data';
NOTE: Libref SAVE was successfully assigned as follows: 
      Engine:        V612 
      Physical Name: /angrist/ipums/csl/data
21         filename savedat '/angrist/ipums/csl/raw/save3.txt';
22         
23         ********************************;
24         **** flatten the excel file ****;
25         ********************************;
26         
27         data one;
28          infile '/angrist/ipums/csl/raw/csl3.txt' missover;
29          file savedat;
30          missing M;
31         
32          input pob $ pobfip year min $ max $ minexpt $ minawork $ minework 
$;
33          put pob $ pobfip year min $ max $ minexpt $ minawork $ minework $;
34         
2 The SAS System                              17:38 Thursday, January 21, 1999

35         

NOTE: The infile '/angrist/ipums/csl/raw/csl3.txt' is:
      File Name=/angrist/ipums/csl/raw/csl3.txt,
      Owner Name=angrist,Group Name=mit,
      Access Permission=rw-------,
      File Size (bytes)=46925

NOTE: The file SAVEDAT is:
      File Name=/angrist/ipums/csl/raw/save3.txt,
      Owner Name=angrist,Group Name=mit,
      Access Permission=rw-------

NOTE: 686 records were read from the infile '/angrist/ipums/csl/raw/csl3.txt'.
      The minimum record length was 67.
      The maximum record length was 68.
NOTE: 686 records were written to the file SAVEDAT.
      The minimum record length was 20.
      The maximum record length was 31.
NOTE: The data set WORK.ONE has 686 observations and 8 variables.
NOTE: DATA statement used:
      real time           0.450 seconds
      cpu time            0.081 seconds
      

36         proc print data=one;
37         title 'original data set';
38         
39         ***********************************************;
40         ***************** imputation ******************;
41         ***********************************************;
42         

NOTE: The PROCEDURE PRINT printed pages 1-13.
NOTE: PROCEDURE PRINT used:
      real time           0.050 seconds
      cpu time            0.041 seconds
      

43         data two;
44         
45          retain;
46         
47          length pob $ 16;
48         
49          infile savedat missover;
50          input pob $ pobfip year t_min $ t_max $ t_mnexpt $ t_mnawrk $ 
t_mnewrk $;
51         
52          if t_min ne 'M' then min=t_min;
53          if t_max ne 'M' then max=t_max;
54          if t_mnexpt ne 'M' then minexpt=t_mnexpt;
55          if t_mnawrk ne 'M' then minawork=t_mnawrk;
56          if t_mnewrk ne 'M' then minework=t_mnewrk;
57         
58         drop t_min t_max t_mnexpt t_mnawrk t_mnewrk;
59         
3 The SAS System                              17:38 Thursday, January 21, 1999

60         run;

NOTE: The infile SAVEDAT is:
      File Name=/angrist/ipums/csl/raw/save3.txt,
      Owner Name=angrist,Group Name=mit,
      Access Permission=rw-------,
      File Size (bytes)=19658

NOTE: 686 records were read from the infile SAVEDAT.
      The minimum record length was 20.
      The maximum record length was 31.
NOTE: The data set WORK.TWO has 686 observations and 8 variables.
NOTE: DATA statement used:
      real time           0.230 seconds
      cpu time            0.045 seconds
      

61         
62         proc print data=two;
63         title 'imputed data set';
64         
65         ************************************************;
66         **** expand data set to cover missing years ****;
67         ************************************************;
68         

NOTE: The PROCEDURE PRINT printed pages 14-26.
NOTE: PROCEDURE PRINT used:
      real time           0.040 seconds
      cpu time            0.037 seconds
      

69         proc sort;
70          by pobfip descending year;
71         

NOTE: The data set WORK.TWO has 686 observations and 8 variables.
NOTE: PROCEDURE SORT used:
      real time           0.200 seconds
      cpu time            0.020 seconds
      

72         data three;
73          set two;
74         
75          nextyear=lag(year);
76          if year=1978 then nextyear=1979;
77         

NOTE: The data set WORK.THREE has 686 observations and 9 variables.
NOTE: DATA statement used:
      real time           0.210 seconds
      cpu time            0.026 seconds
      

78         proc sort;
79          by pobfip year;
4 The SAS System                              17:38 Thursday, January 21, 1999

80         

NOTE: The data set WORK.THREE has 686 observations and 9 variables.
NOTE: PROCEDURE SORT used:
      real time           0.210 seconds
      cpu time            0.019 seconds
      

81         proc print data=three;
82         
83         

NOTE: The PROCEDURE PRINT printed pages 27-39.
NOTE: PROCEDURE PRINT used:
      real time           0.040 seconds
      cpu time            0.041 seconds
      

84         data save.impute3;
85          set three;
86         
87          nnext=(nextyear-1)/1;
88          nyear=year;
89          birthpl=pobfip;
90         
91          do j=nyear to nnext;
92           year=j;
93           drop nyear nnext j pobfip;
94           output;
95           end;
96         

NOTE: The data set SAVE.IMPUTE3 has 3185 observations and 9 variables.
NOTE: DATA statement used:
      real time           0.260 seconds
      cpu time            0.041 seconds
      

97         proc print data=save.impute3;
98         title 'expanded data set to be matched to census';
99         
100        run;

NOTE: The PROCEDURE PRINT printed pages 40-97.