Wednesday, August 20, 2008

Proc SQL VS Datastep: SAS syntax

Combining/ Merging data in SAS/ SQL



1)DATASTEP SYNTAX

DATA set;
SET one two;
RUN;
PROC SQL SYNTAX:PROC SQL;
CREATE TABLE set AS
SELECT * FROM one
OUTER UNION
SELECT * FROM two;
QUIT;

2) DATA STEP SYNTAX

PROC SORT DATA=one;
BY patient;
RUN;

PROC SORT DATA=two;
BY patient;
RUN;

DATA both;
MERGE one (IN=A) two (IN=B);
BY patient;
IF A OR B;
RUN;
PROC SQL SYNTAX:PROC SQL;
CREATE TABLE Both AS
SELECT a.*, b.*
FROM one AS a FULL JOIN
two AS b
ON a.patient=b.patient;
QUIT;

3)DATA STEP SYNTAX

PROC SORT DATA=one;
BY patient;
RUN;
PROC SORT DATA=two;
BY patient;
RUN;
DATA both;
MERGE one (IN=A) two (IN=B);
BY patient;
IF A AND B;
RUN;
PROC SQL SYNTAXPROC SQL;
CREATE TABLE Both AS
SELECT a.*, b.*
FROM one AS a INNER JOIN
two AS b
ON a.patient=b.patient;
QUIT;

4)DATA STEP SYNTAX
DATA selvar1 ;
SET ex.admits (KEEP = pt_id admdate disdate);
RUN;

PROC SQL SYNTAX
PROC SQL;
CREATE TABLE selvar2 AS
SELECT pt_id, admdate, disdate
FROM ex.admits ;
QUIT;

5)DATA STEP SYNTAX

* Sort statements here *;
DATA ifa1;
MERGE one (IN=A) two (IN=B);
BY patient;
IF A ;
RUN;
DATA STEP SYNTAX
* Sort statements here *;
DATA ifb1;
MERGE one (IN=A) two (IN=B);
BY patient;
IF B;
RUN;

PROC SQL SYNTAXPROC SQL;
CREATE TABLE if a1 AS
SELECT a.*,
b.treatment
FROM one AS a LEFT JOIN
two AS b
ON a.patient=b.patient;
QUIT;

PROC SQL SYNTAXPROC SQL;
CREATE TABLE ifb1 AS
SELECT a.age,
a.sex,
b.*
FROM one AS a RIGHT JOIN
two AS b
ON a.patient=b.patient;
QUIT;


6)DATA STEP SYNTAX

DATA mi1 ;
SET ex.admits ;
IF primdx EQ: ’410’ ;
RUN;
PROC SQL SYNTAXPROC SQL;
CREATE TABLE mi2 AS
SELECT *
FROM ex.admits
WHERE primdx LIKE ’410%’ ;
QUIT;

7)DATA STEP SYNTAX
DATA vahosp1 ;
SET ex.admits ;
IF hosp EQ 3 ;
RUN;

PROC SQL SYNTAX
PROC SQL FEEDBACK;
CREATE TABLE vahosp2 AS
SELECT *
FROM ex.admits
WHERE hosp EQ 3;
QUIT;


8)DATA STEP SYNTAX

DATA selmd1 ;
SET ex.doctors (KEEP = md_id lastname hospadm
RENAME = (hospadm=hospital));
BY md_id ;
IF NOT (FIRST.md_id AND LAST.md_id) ;
RUN;
PROC SORT DATA=selmd1 ;
BY lastname ;
RUN;

PROC SQL SYNTAX
PROC SQL ;
CREATE TABLE selmd2 AS
SELECT md_id, lastname,
hospadm AS hospital
FROM ex.doctors
GROUP BY md_id
HAVING COUNT(*) GE 2
ORDER BY lastname ;
QUIT;



9)DATA STEP SYNTAX

DATA twowks1 ;
SET ex.admits (KEEP = pt_id hosp admdate
disdate) ;
los = (disdate - admdate) + 1;
ATTRIB los LENGTH=4 LABEL=’Length of Stay’;
IF los GE 14 ;
RUN;

PROC SQL SYNTAX
PROC SQL;
CREATE TABLE twowks2 AS
SELECT pt_id, hosp, admdate, disdate,
(disdate - admdate) + 1 AS los
LENGTH=4 LABEL=’Length of Stay’
FROM ex.admits
WHERE CALCULATED los GE 14;
Quit;


10)DATA STEP SYNTAX

DATA prim1 (DROP = primmd);
MERGE ex.admits (IN=adm KEEP = pt_id
admdate disdate hosp md)
ex.patients (IN=pts KEEP = id lastname
primmd RENAME=(id=pt_id));
BY pt_id ;
IF adm AND pts AND (md EQ primmd) ;
RUN;

PROC SORT DATA=prim1; BY md; RUN;
DATA doctors ;
SET ex.doctors (KEEP = md_id lastname);
BY md_id ;
IF FIRST.md_id ;
RUN;

DATA prim1a ;
MERGE prim1 (IN=p RENAME=(lastname=ptname
md=md_id))
doctors (RENAME = (lastname=mdname));
BY md_id ;
IF p ;
RUN;

PROC SQL SYNTAX
Proc Sql;
CREATE TABLE prim2 AS
SELECT pt_id, admdate, disdate, hosp, md_id,
b.lastname AS ptname,
c.lastname AS mdname
FROM ex.admits a, ex.patients b,
(SELECT DISTINCT md_id, lastname
FROM ex.doctors) c
WHERE (a.pt_id EQ b.id) AND
(a.md EQ b.primmd) AND
(a.md EQ c.md_id)
ORDER BY a.pt_id, admdate ;
QUIT;



11)DATA STEP SYNTAX

PROC SORT DATA = ex.admits (KEEP = hosp)
OUT=admits RENAME=(hosp=hosp_id)) NODUPKEY;
BY hosp ;
RUN;

DATA HOSPS1 ;
MERGE ex.hospital (IN=hosp)
admits (IN=adm);
BY hosp_id ;
IF hosp ;
hasadmit = adm ;
RUN;
PROC SQL SYNTAXPROC SQL ;
CREATE TABLE hosps2 AS
SELECT DISTINCT a.*,
hosp IS NOT NULL AS hasadmit
FROM ex.hospital a LEFT JOIN ex.admits b
ON a.hosp_id = b.hosp ;
QUIT;



12)DATA STEP SYNTAX
DATA admits1 ;
MERGE ex.admits (IN=adm KEEP = pt_id admdate
disdate hosp md)
ex.patients (IN=pts KEEP = id lastname
sex primmd RENAME = (id=pt_id));
BY pt_id ;
IF adm AND pts;
RUN;
PROC SQL SYNTAXPROC SQL ;
CREATE TABLE admits2 AS
SELECT pt_id, admdate, disdate, hosp, md,
lastname, sex, primmd
FROM ex.admits AS a, ex.patients AS b
WHERE a.pt_id = b.id
ORDER BY a.pt_id, admdate ;
Quit;


13)DATA STEP SYNTAX
PROC SORT DATA = ex.admits (KEEP=md hosp)
OUT = admits;
BY md;
RUN;

PROC SORT DATA = ex.doctors OUT=doctors
NODUPKEY ;
BY md_id ;
RUN;

DATA vadocs1 (DROP = hosp);
MERGE doctors (IN=docs KEEP=md_id lastname)
admits (IN=adm WHERE=(hosp = 3)
RENAME = (md=md_id)) ;
BY md_id;
IF docs AND adm AND FIRST.md_id ;
RUN;

PROC SORT; BY lastname; RUN;

PROC SQL SYNTAX
PROC SQL;
CREATE TABLE vadocs2 AS
SELECT DISTINCT md_id, lastname
FROM ex.doctors AS d
WHERE 3 IN
(SELECT hosp
FROM ex.admits AS a
WHERE d.md_id = a.md)
ORDER BY lastname;
QUIT;


14)DATA STEP SYNTAXDATA step:
DATA table3;
SET table1 table2;
Run;

PROC SQL SYNTAX
PROC SQL;
CREATE TABLE table3 AS
SELECT *
FROM table1
OUTER UNION CORRESPONDING
SELECT *
FROM table2;
QUIT;

15)DATA STEP SYNTAX

DATA table3;
MERGE table1 (IN=l)
Table2 (IN=r);
BY keyvar;
IF l AND r; *** inner join ;
PROC SQL SYNTAX
PROC SQL;
CREATE TABLE table3 AS
SELECT *
FROM table1 AS l
INNER JOIN table2 AS r
ON l.keyvar=r.keyvar;
QUIT;


16)DATA STEP SYNTAX
DATA table2;
SET table1(WHERE=(var1=value1));
DATA table4;
SET table3
IF var1=value1 AND
var2 IN (value-list ); run;

DATA table7;
MERGE table5 table6;
BY var1;
IF M0D(var4,3) NE 0 THEN DELETE;
PROC SQL SYNTAX
PROC SQL;
CREATE TABLE table3 AS
SELECT var1, var2, var3, var4
FROM table1 AS a
, table2 AS b
WHERE a.var1=b.var1
AND a.var1 IN (value-list);
QUIT;
PROC SQL SYNTAXPROC SQL;
CREATE TABLE table4 AS
SELECT *
FROM table3;
DELETE
FROM table4
WHERE var1 IS MISSING;
Quit;



source: www2.sas.com , www.nesug.org, www.ats.ucla.com


How can I get a new column like _N_ in my SAS table using PROC SQL ... -

3 comments:

Anonymous said...

I was waiting for the pros and cons by using datastep and proc sql snif snif

Unknown said...

please explain in detail. sonme theoratical differences in using proc sql and datastep. merits n demerits

sarath said...

Proc SQL has many advantages associated with it when compared to the SAS data step.

Most of the times Proc SQL does the same task as the data steps but it require less code, and it is generally easy to maintain than long data step code. Beyond this, SQL uses less computer resources than other SAS Procedures available.

Proc SQL doesn’t need any presorting of the variables unlike data step, which does.

Proc SQL Does Cartesian joins whereas the simple data step merge doesn’t. This difference is important when you deal with many-to-many merges.

Even thought Proc SQL does have some cons… but the Pros overweigh the Cons…

Post a Comment

ShareThis