*** LOCATION OF THE ORIGINAL DATA SET; libname a 'f:\sasclass\sasdata8'; *** WHAT ARE THE PROPERTIES OF THE ORIGINAL DATA; proc contents data=a.class01; run; *** WHAT ARE THE DISTRIBUTIONS OF VARIABLES IN THE ORIGINAL DATA; *** DO NOT ANALYZE ID; proc freq data=a.class01 (drop=id); run; *** CREATE A NEW DATA SET; data new; *** SET LENGTHS FOR TWO NEW CHARACTER VARIABLES FOR METHOD OF DELIVERY; length mdl1 mdl2 $1; set a.class01; *** NEW CHARACTER VARIABLE GENDER TO REPLACE NUMERIC VARIABLE SEX; gender = put(sex,1.); *** CHECK AGE VARIABLES FOR ALLOWABLE VALUES; if not (mage ge 9 and mage le 61) then mage = . ; if not (fage ge 9 and fage le 65) then fage = . ; *** CHANGE EDUCATION VARIABLES TO LENGTH = 1; fed = put(feduc,$1.); med = put(meduc,$1.); *** CHECK EDUCATION VARIABLES FOR ALLOWABLE VALUES; if not (fed ge '1' and fed le '5') then fed = ' '; if not (med ge '1' and med le '5') then med = ' '; *** CREATE NUMERIC GESTATION VARIABLE AND CHECK FOR ALLOWABLE VALUES; ges = input(gest,3.); if not (ges ge 126 and ges le 315) then ges = . ; *** CREATE GESTATION IN WEEKS; gwk = int(ges/7); *** CREATE NUMERIC BIRTH WEIGHT VARIABLE AND CHECK FOR ALLOWABLE VALUES; bwt = input(bwgt,4.); if not (bwt ge 300 and bwt le 7000) then bwt = . ; *** CHANGE PRENATAL VISITS EQ 99 TO MISSING; if pnv eq '99' then pnv = ' '; *** CREATE TWO METHOD OF DELIVERY VARIABLES AND CHECK FOR ALLOWABLE VALUES; mdl1 = substr(methdel,1,1); mdl2 = substr(methdel,2,1); if not (mdl1 ge '1' and mdl1 le '8') then mdl1 = ' '; if not (mdl2 ge '1' and mdl2 le '8') then mdl2 = ' '; *** REPLACE VARIABLE WHERE WITH VARIABLE NAMED LOC, LENGTH = 1; if where eq 'city' then loc = 'C'; else if where eq 'ooc' then loc = 'O'; else loc = 'U'; *** ADD LABELS; label bwt = 'BIRTH WEIGHT' ges = 'GESTATION (DAYS)' gwk = 'GESTATION (WEEKS)' fed = "FATHER'S EDUCATION" med = "MOTHER'S EDUCATION" mage = "MOTHER'S AGE" fage = "FATHER'S AGE" live = 'INFANT ALIVE' tpb = 'TOTAL PREVIOUS BIRTHS' tplb = 'TOTAL PREVIOUS LIVE BIRTHS' pnv = 'PRENATAL VISITS' plur = 'PLURALITY' loc = 'WHERE BIRTH OCCURRED' mdl1 = 'METHOD OF DELIVERY-1' mdl2 = 'METHOD OF DELIVERY-2' apgar1 = '1-MINUTE APGAR SCORE' apgar5 = '5-MINUTE APGAR SCORE' ; *** GET RID OF VARIABLES NO LONGER NEEDED IN NEW DATA SET; drop sex feduc meduc gest bwgt methdel where; run; *** FORCE DISPLAY OF VARIABLE NAMES TO BE IN UPPERCASE; options validvarname=upcase; *** LOOK AT THE PROPERTIES OF THE NEW DATA SET; proc contents data=new; run; *** CREATE FORMATS FOR USE WITH VARIABLES IN THE NEW DATA SET; proc format; value $loc 'C' = 'NEW YORK CITY' 'O' = 'OUT-OF-STATE' 'U' = 'UPSTATE' ; value bwt 300 - 7000 = 'OK' other = 'UNKNOWN' ; value ges 126 - 315 = 'OK' other = 'UNKNOWN' ; value $apgar '0' - '6' = 'LOW' '7'-'9','X' = 'OK' other = 'UNKNOWN' ; run; *** USE THE FORMATS TO LOOK AT THE 'CLEANED' DATA SET; proc freq data=new; table loc bwt ges apgar1 apgar5 / missing; format loc $loc. bwt bwt. ges ges. apgar1 apgar5 $apgar.; run; *** PRODUCE HISTOGRAMS AND BOX PLOTS OF THE NUMERIC VARIABLES; *** USE ODS COMMAND TO RESTRICT OUTPUT TO THE HISTOGRAMS AND BOX PLOTS; proc univariate data=new plot; var ges gwk bwt mage fage; ods select plots; run;