Programming Examples
		1984 data
		1994 data
		1994 Moved HH data
		1994 Migrant data
		2000 data
		2000 Moved HH data
		2000 Migrant data

You are here: Home > Data > Identifiers > Person Identifier > Programming Examples > 2000 data

Add the Person Identifier (NRPID) to the 2000 data

SAS Program go to SAS Log

******************************************************************************
Attach NRPID to the 2000 Individual-Level Data File
   1. Select ONLY 2000 Individuals in the PERSONID Data File
   2. Restructure WORK.PERSON00 into "Child" File, 
      as 2511 Code 2 Individuals are in data TWICE
   3. Match WORK.SPERSON00B to the INDIV00 Data File

Input data: /nangrong/personid.X01
            /nangrong/2000/indiv00.03
*****************************************************************************;

libname in1 xport '/nangrong/personid.X01';
libname in2 xport '/nangrong/2000/indiv00.03';

*********************************************************
*  Attach NRPID to the 2000 Individual-Level Data File  *
*********************************************************;

* 1. Select ONLY 2000 Individuals in the PERSONID Data File *
--------------------------------------------------------------;
data person00;
  set in1.personid(keep=HHID00 CEP00 OHHID00 OCEP00 NRPID);

  if (HHID00 ne ' ');

*** Rename Identifiers ***;

rename
HHID00=DHHID00
CEP00=DCEP00
;

run;

* 2. Restructure PERSON00 into "Child" File, 
     as 2511 Code 2 Individuals are in data TWICE *
----------------------------------------------------;
data person00b;
  set person00;
  length HHID00 $ 9 CEP00 $ 2;

  keep HHID00 CEP00 NRPID;

  array i3 {2} DHHID00 OHHID00;
  array i4 {2} DCEP00 OCEP00;

  do i=1 to 2;
   HHID00=i3{i};
   CEP00=i4{i};
   if HHID00 ne ' ' then output;
  end;

run;

*** Sort PERSON00B by HHID00 CEP00 ***;

proc sort data=person00b out=sperson00b nodupkey;
  by HHID00 CEP00;
run;

* 3. Match SPERSON00B to the INDIV00 Data File *
-------------------------------------------------;
data indiv00_nrpid notin_indiv00 notin_person00a;
  merge sperson00b(in=a)
        in6.indiv00(in=b);
  by HHID00 CEP00;

  if a=1 and b=1 then output indiv00_nrpid;
  if a=1 and b=0 then output notin_indiv00;
  if a=0 and b=1 then output notin_person00a;

run;

*** Check for Duplicates on NRPID in INDIV00_NRPID (SHOULD HAVE 2511!) ***;

proc sort data=indiv00_nrpid out=sindiv00_nrpid nodupkey;
  by NRPID;
run;

SAS Log go back to SAS Program

271        *********************************************************
272        *  Attach NRPID to the 2000 Individual-Level Data File  *
273        *********************************************************;
274        
275        * 1. Select ONLY 2000 Individuals in the PERSONID Data File *
276        --------------------------------------------------------------;
277        data person00;
278          set in1.personid(keep=HHID00 CEP00 OHHID00 OCEP00 NRPID);
279        
280          if (HHID00 ne ' ');
281        
282        *** Rename Identifiers ***;
283        
284        rename
285        HHID00=DHHID00
286        CEP00=DCEP00
287        ;
288        
289        run;

NOTE: There were 57416 observations read from the data set IN1.PERSONID.
NOTE: The data set WORK.PERSON00 has 53093 observations and 5 variables.
NOTE: DATA statement used:
      real time           0.94 seconds
      cpu time            0.90 seconds
      

290        
291        * 2. Restructure PERSON00 into "Child" File,
                as 2511 Code 2 Individuals are in data TWICE *
292        ----------------------------------------------------;
293        data person00b;
294          set person00;
295          length HHID00 $ 9 CEP00 $ 2;
296        
297          keep HHID00 CEP00 NRPID;
298        
299          array i3 {2} DHHID00 OHHID00;
300          array i4 {2} DCEP00 OCEP00;
301        
302          do i=1 to 2;
303           HHID00=i3{i};
304           CEP00=i4{i};
305           if HHID00 ne ' ' then output;
306          end;
307        
308        run;

NOTE: There were 53093 observations read from the data set WORK.PERSON00.
NOTE: The data set WORK.PERSON00B has 55604 observations and 3 variables.
NOTE: DATA statement used:
      real time           0.62 seconds
      cpu time            0.59 seconds
      

309        
310        *** Sort PERSON00B by HHID00 CEP00 ***;
311        
312        proc sort data=person00b out=sperson00b nodupkey;
313          by HHID00 CEP00;
314        run;

NOTE: 0 observations with duplicate key values were deleted.
NOTE: There were 55604 observations read from the data set WORK.PERSON00B.
NOTE: The data set WORK.SPERSON00B has 55604 observations and 3 variables.
NOTE: PROCEDURE SORT used:
      real time           0.71 seconds
      cpu time            0.68 seconds
      

315        
316        * 3. Match SPERSON00B to the INDIV00 Data File *
317        -------------------------------------------------;
318        data indiv00_nrpid notin_indiv00 notin_person00a;
319          merge sperson00b(in=a)
320                in6.indiv00(in=b);
321          by HHID00 CEP00;
322        
323          if a=1 and b=1 then output indiv00_nrpid;
324          if a=1 and b=0 then output notin_indiv00;
325          if a=0 and b=1 then output notin_person00a;
326        
327        run;

NOTE: There were 55604 observations read from the data set WORK.SPERSON00B.
NOTE: There were 51924 observations read from the data set IN6.INDIV00.
NOTE: The data set WORK.INDIV00_NRPID has 51924 observations and 78 variables.
NOTE: The data set WORK.NOTIN_INDIV00 has 3680 observations and 78 variables.
NOTE: The data set WORK.NOTIN_PERSON00A has 0 observations and 78 variables.
NOTE: DATA statement used:
      real time           12.55 seconds
      cpu time            12.41 seconds
      

328        
329        *** Check for Duplicates on NRPID in INDIV00_NRPID (SHOULD HAVE 2511!) ***;
330        
331        proc sort data=indiv00_nrpid out=sindiv00_nrpid nodupkey;
332          by NRPID;
333        run;

NOTE: 2511 observations with duplicate key values were deleted.
NOTE: There were 51924 observations read from the data set WORK.INDIV00_NRPID.
NOTE: The data set WORK.SINDIV00_NRPID has 49413 observations and 78 variables.
NOTE: PROCEDURE SORT used:
      real time           4.26 seconds

Last Modified: 02/16/2005