CDISC準拠のADaMデータの解析プログラムの紹介
臨床試験のデータ解析をSASで行う例
有害事象の集計や臨検値の集計は、ほとんど同じ
rtfやexcel出力まではやらないが、出力直前のデータまでをなんとかSQLでやってみる
そうすればRやpythonなど他の言語でも再現しやすい
有害事象の集計については以前書いた
https://qiita.com/saspy/items/b87e1917f52534456b36
CDISC知らない人への補足
CDISC準拠のADaM.ADSLとADaM.ADLBから臨検値の要約統計量を算出
臨検値の項目ごと、検査時点ごとに集計(PARAM/AVISIT)
ADSLは1人1行で、なんの薬飲んだかとかその人自身の情報が入ってる
ADLBは、臨検値のデータ
ASTがいくつとか、健康診断で見るようなものが、
臨検値の項目ごと、検査時点ごとに縦に入ってるデータ
SDTM LBとかでググればわかりやすい
(SDTMのLBを解析用に整えたデータがADLB)
臨検値の要約統計量の算出やデータ加工をすべてSQLでやってみた!
SAFFLとANL01FLを抽出条件、それ以外を使う場合はマクロ変数Whereに指定
例えば臨検値の項目をしぼるのに使用
libname
libname adam "ADaMのデータセットのフォルダパス" access = readonly;
臨検値マクロ
%macro LB_Macro(Where = %str());
%* 投与群 *;
proc Sql noprint;
select min( TRT01AN ) into : Min_N trimmed from adam.ADSL where SAFFL = "Y";
select max( TRT01AN ) into : Max_N trimmed from adam.ADSL where SAFFL = "Y";
quit;
%let TRT_N = %sysevalf( &Max_N. - &Min_N. + 1 );
%* ADLBデータ *;
%let Group = PARAMN , PARAM , AVISITN , AVISIT , TRTAN , TRTA;
proc Sql;
%* 四捨五入用に小数部分の最大桁数を取得 *;
create table work.ADLB as
select
STUDYID , SUBJID , &Group. , AVAL , AVALC , CHG , ABLFL ,
max( max( lengthn( scan( AVALC , 2 , "." ) ) ) , 0 ) as MAX_D
from adam.ADLB( where = ( ANL01FL = "Y" and SAFFL = "Y" %if %length( &Where. ) > 0 %then and &Where.; ) )
group by PARAMN , PARAM
order by &Group.;
create table work.BASE as
select distinct PARAMN , PARAM , AVISITN , AVISIT
from work.ADLB( where = ( ABLFL = "Y" ) );
quit;
%* 集計 ;
proc Sql;
create table work.STAT as
select * , 1 as CATEGORY
from
( select
&Group. , MAX_D ,
put( n( AVAL ) , best. -L ) as AVAL ,
put( n( CHG ) , best. -L ) as CHG
from work.ADLB
group by &Group. , MAX_D
)
outer union corr
select * , 2 as CATEGORY
from
( select
&Group. , MAX_D ,
cats( putn( round( mean( AVAL ) , 10 * ( - ( MAX_D + 1 ) ) ) , cats( "15." , MAX_D + 1 ) ) )
|| " (" ||
cats( putn( round( std( AVAL ) , 10 ** ( - ( MAX_D + 1 ) ) ) , cats( "15." , MAX_D + 1 ) ) )
|| ")" as AVAL ,
cats( putn( round( mean( CHG ) , 10 ** ( - ( MAX_D + 1 ) ) ) , cats( "15." , MAX_D + 1 ) ) )
|| " (" ||
cats( putn( round( std( CHG ) , 10 ** ( - ( MAX_D + 1 ) ) ) , cats( "15." , MAX_D + 1 ) ) )
|| ")" as CHG
from work.ADLB
group by &Group. , MAX_D
)
outer union corr
select * , 3 as CATEGORY
from
( select
&Group. , MAX_D ,
cats( putn( round( median( AVAL ) , 10 ** ( - ( MAX_D + 1 ) ) ) , cats( "15." , MAX_D + 1 ) ) ) as AVAL ,
cats( putn( round( median( CHG ) , 10 ** ( - ( MAX_D + 1 ) ) ) , cats( "15." , MAX_D + 1 ) ) ) as CHG
from work.ADLB
group by &Group. , MAX_D
)
outer union corr
select * , 4 as CATEGORY
from
( select
&Group. , MAX_D ,
cats( putn( round( min( AVAL ) , 10 ** ( - MAX_D ) ) , cats( "15." , MAX_D ) ) )
|| ", " ||
cats( putn( round( max( AVAL ) , 10 ** ( - MAX_D ) ) , cats( "15." , MAX_D ) ) ) as AVAL ,
cats( putn( round( min( CHG ) , 10 ** ( - MAX_D ) ) , cats( "15." , MAX_D ) ) )
|| ", " ||
cats( putn( round( max( CHG ) , 10 ** ( - MAX_D ) ) , cats( "15." , MAX_D ) ) ) as CHG
from work.ADLB
group by &Group. , MAX_D
)
;
quit;
%* Baseline時点のChangeの値を削除 *;
proc Sql;
create table work.STAT2 as
select
A.PARAMN , A.PARAM , A.AVISITN , A.AVISIT , A.CATEGORY , A.TRTAN , A.TRTA ,
A.AVAL ,
case when B.AVISITN ^= . then "-" else CHG end as CHG ,
case CATEGORY when 1 then "n" when 2 then "Mean (SD)" when 3 then "Median" when 4 then "Min, Max" else "" end as KOUMOKU
from work.STAT as A
left join work.BASE as B
on A.PARAMN = B.PARAMN and A.AVISITN = B.AVISITN
order by A.PARAMN , A.PARAM , A.AVISITN , A.AVISIT , A.CATEGORY , A.TRTAN , A.TRTA;
quit;
%* LB 集計結果まとめ *;
proc Sql;
create table work.STAT_RESULT as
select
PARAMN , PARAM , AVISITN , AVISIT , CATEGORY , KOUMOKU
%do i = &Min_N. %to &Max_N. ;
, max( case when TRTAN = &i. then AVAL else "-" end ) as AVAL&i. length = 50
, max( case when TRTAN = &i. then CHG else "-" end ) as CHG&i. length = 50
%end ;
from work.STAT2
group by PARAMN , PARAM , AVISITN , AVISIT , CATEGORY , KOUMOKU
order by PARAMN , PARAM , AVISITN , AVISIT , CATEGORY , KOUMOKU;
quit;
%mend;
SQLプロシジャのmedianは、SAS9.4かららしいので、それ以前のバージョンだとうまくいかない(SAS関数とみなされる)
出力桁については
平均値、中央値、標準偏差は、データ内の表示桁+1
最大値と最小値はデータ内の表示桁と同じもの