Add New Records Within By Groups Using Aggregation Functions
Source:R/derive_summary_records.R
derive_summary_records.Rd
It is not uncommon to have an analysis need whereby one needs to derive an
analysis value (AVAL
) from multiple records. The ADaM basic dataset
structure variable DTYPE
is available to indicate when a new derived
records has been added to a dataset.
Usage
derive_summary_records(
dataset,
by_vars,
filter = NULL,
analysis_var,
summary_fun,
set_values_to = NULL
)
Arguments
- dataset
A data frame.
- by_vars
Variables to consider for generation of groupwise summary records. Providing the names of variables in
exprs()
will create a groupwise summary and generate summary records for the specified groups.- filter
Filter condition as logical expression to apply during summary calculation. By default, filtering expressions are computed within
by_vars
as this will help when an aggregating, lagging, or ranking function is involved.For example,
filter = (AVAL > mean(AVAL, na.rm = TRUE))
will filter allAVAL
values greater than mean ofAVAL
with inby_vars
.filter = (dplyr::n() > 2)
will filter n count ofby_vars
greater than 2.
- analysis_var
Analysis variable.
- summary_fun
Function that takes as an input the
analysis_var
and performs the calculation. This can include built-in functions as well as user defined functions, for examplemean
orfunction(x) mean(x, na.rm = TRUE)
.- set_values_to
Variables to be set
The specified variables are set to the specified values for the new observations.
A list of variable name-value pairs is expected.
LHS refers to a variable.
RHS refers to the values to set to the variable. This can be a string, a symbol, a numeric value or
NA
, e.g.,exprs(PARAMCD = "TDOSE", PARCAT1 = "OVERALL")
. More general expression are not allowed.
Details
When all records have same values within by_vars
then this function will
retain those common values in the newly derived records. Otherwise new value
will be set to NA
.
See also
BDS-Findings Functions for adding Parameters/Records:
default_qtc_paramcd()
,
derive_expected_records()
,
derive_extreme_event()
,
derive_extreme_records()
,
derive_locf_records()
,
derive_param_bmi()
,
derive_param_bsa()
,
derive_param_computed()
,
derive_param_doseint()
,
derive_param_exist_flag()
,
derive_param_exposure()
,
derive_param_extreme_event()
,
derive_param_framingham()
,
derive_param_map()
,
derive_param_qtc()
,
derive_param_rr()
,
derive_param_wbc_abs()
Examples
library(tibble)
library(dplyr, warn.conflicts = TRUE)
adeg <- tribble(
~USUBJID, ~EGSEQ, ~PARAM, ~AVISIT, ~EGDTC, ~AVAL, ~TRTA,
"XYZ-1001", 1, "QTcF Int. (msec)", "Baseline", "2016-02-24T07:50", 385, "",
"XYZ-1001", 2, "QTcF Int. (msec)", "Baseline", "2016-02-24T07:52", 399, "",
"XYZ-1001", 3, "QTcF Int. (msec)", "Baseline", "2016-02-24T07:56", 396, "",
"XYZ-1001", 4, "QTcF Int. (msec)", "Visit 2", "2016-03-08T09:45", 384, "Placebo",
"XYZ-1001", 5, "QTcF Int. (msec)", "Visit 2", "2016-03-08T09:48", 393, "Placebo",
"XYZ-1001", 6, "QTcF Int. (msec)", "Visit 2", "2016-03-08T09:51", 388, "Placebo",
"XYZ-1001", 7, "QTcF Int. (msec)", "Visit 3", "2016-03-22T10:45", 385, "Placebo",
"XYZ-1001", 8, "QTcF Int. (msec)", "Visit 3", "2016-03-22T10:48", 394, "Placebo",
"XYZ-1001", 9, "QTcF Int. (msec)", "Visit 3", "2016-03-22T10:51", 402, "Placebo",
"XYZ-1002", 1, "QTcF Int. (msec)", "Baseline", "2016-02-22T07:58", 399, "",
"XYZ-1002", 2, "QTcF Int. (msec)", "Baseline", "2016-02-22T07:58", 410, "",
"XYZ-1002", 3, "QTcF Int. (msec)", "Baseline", "2016-02-22T08:01", 392, "",
"XYZ-1002", 4, "QTcF Int. (msec)", "Visit 2", "2016-03-06T09:50", 401, "Active 20mg",
"XYZ-1002", 5, "QTcF Int. (msec)", "Visit 2", "2016-03-06T09:53", 407, "Active 20mg",
"XYZ-1002", 6, "QTcF Int. (msec)", "Visit 2", "2016-03-06T09:56", 400, "Active 20mg",
"XYZ-1002", 7, "QTcF Int. (msec)", "Visit 3", "2016-03-24T10:50", 412, "Active 20mg",
"XYZ-1002", 8, "QTcF Int. (msec)", "Visit 3", "2016-03-24T10:53", 414, "Active 20mg",
"XYZ-1002", 9, "QTcF Int. (msec)", "Visit 3", "2016-03-24T10:56", 402, "Active 20mg",
)
# Summarize the average of the triplicate ECG interval values (AVAL)
derive_summary_records(
adeg,
by_vars = exprs(USUBJID, PARAM, AVISIT),
analysis_var = AVAL,
summary_fun = function(x) mean(x, na.rm = TRUE),
set_values_to = exprs(DTYPE = "AVERAGE")
)
#> # A tibble: 24 x 8
#> USUBJID EGSEQ PARAM AVISIT EGDTC AVAL TRTA DTYPE
#> <chr> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
#> 1 XYZ-1001 1 QTcF Int. (msec) Baseline 2016-02-24T07:… 385 "" NA
#> 2 XYZ-1001 2 QTcF Int. (msec) Baseline 2016-02-24T07:… 399 "" NA
#> 3 XYZ-1001 3 QTcF Int. (msec) Baseline 2016-02-24T07:… 396 "" NA
#> 4 XYZ-1001 4 QTcF Int. (msec) Visit 2 2016-03-08T09:… 384 "Placeb… NA
#> 5 XYZ-1001 5 QTcF Int. (msec) Visit 2 2016-03-08T09:… 393 "Placeb… NA
#> 6 XYZ-1001 6 QTcF Int. (msec) Visit 2 2016-03-08T09:… 388 "Placeb… NA
#> 7 XYZ-1001 7 QTcF Int. (msec) Visit 3 2016-03-22T10:… 385 "Placeb… NA
#> 8 XYZ-1001 8 QTcF Int. (msec) Visit 3 2016-03-22T10:… 394 "Placeb… NA
#> 9 XYZ-1001 9 QTcF Int. (msec) Visit 3 2016-03-22T10:… 402 "Placeb… NA
#> 10 XYZ-1002 1 QTcF Int. (msec) Baseline 2016-02-22T07:… 399 "" NA
#> # … with 14 more rows
advs <- tribble(
~USUBJID, ~VSSEQ, ~PARAM, ~AVAL, ~VSSTRESU, ~VISIT, ~VSDTC,
"XYZ-001-001", 1164, "Weight", 99, "kg", "Screening", "2018-03-19",
"XYZ-001-001", 1165, "Weight", 101, "kg", "Run-In", "2018-03-26",
"XYZ-001-001", 1166, "Weight", 100, "kg", "Baseline", "2018-04-16",
"XYZ-001-001", 1167, "Weight", 94, "kg", "Week 24", "2018-09-30",
"XYZ-001-001", 1168, "Weight", 92, "kg", "Week 48", "2019-03-17",
"XYZ-001-001", 1169, "Weight", 95, "kg", "Week 52", "2019-04-14",
)
# Set new values to any variable. Here, `DTYPE = MAXIMUM` refers to `max()` records
# and `DTYPE = AVERAGE` refers to `mean()` records.
derive_summary_records(
advs,
by_vars = exprs(USUBJID, PARAM),
analysis_var = AVAL,
summary_fun = max,
set_values_to = exprs(DTYPE = "MAXIMUM")
) %>%
derive_summary_records(
by_vars = exprs(USUBJID, PARAM),
analysis_var = AVAL,
summary_fun = mean,
set_values_to = exprs(DTYPE = "AVERAGE")
)
#> # A tibble: 8 x 8
#> USUBJID VSSEQ PARAM AVAL VSSTRESU VISIT VSDTC DTYPE
#> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr>
#> 1 XYZ-001-001 1164 Weight 99 kg Screening 2018-03-19 NA
#> 2 XYZ-001-001 1165 Weight 101 kg Run-In 2018-03-26 NA
#> 3 XYZ-001-001 1166 Weight 100 kg Baseline 2018-04-16 NA
#> 4 XYZ-001-001 1167 Weight 94 kg Week 24 2018-09-30 NA
#> 5 XYZ-001-001 1168 Weight 92 kg Week 48 2019-03-17 NA
#> 6 XYZ-001-001 1169 Weight 95 kg Week 52 2019-04-14 NA
#> 7 XYZ-001-001 NA Weight 101 NA NA NA MAXIMUM
#> 8 XYZ-001-001 NA Weight 97.4 NA NA NA AVERAGE
# Sample ADEG dataset with triplicate record for only AVISIT = 'Baseline'
adeg <- tribble(
~USUBJID, ~EGSEQ, ~PARAM, ~AVISIT, ~EGDTC, ~AVAL, ~TRTA,
"XYZ-1001", 1, "QTcF Int. (msec)", "Baseline", "2016-02-24T07:50", 385, "",
"XYZ-1001", 2, "QTcF Int. (msec)", "Baseline", "2016-02-24T07:52", 399, "",
"XYZ-1001", 3, "QTcF Int. (msec)", "Baseline", "2016-02-24T07:56", 396, "",
"XYZ-1001", 4, "QTcF Int. (msec)", "Visit 2", "2016-03-08T09:48", 393, "Placebo",
"XYZ-1001", 5, "QTcF Int. (msec)", "Visit 2", "2016-03-08T09:51", 388, "Placebo",
"XYZ-1001", 6, "QTcF Int. (msec)", "Visit 3", "2016-03-22T10:48", 394, "Placebo",
"XYZ-1001", 7, "QTcF Int. (msec)", "Visit 3", "2016-03-22T10:51", 402, "Placebo",
"XYZ-1002", 1, "QTcF Int. (msec)", "Baseline", "2016-02-22T07:58", 399, "",
"XYZ-1002", 2, "QTcF Int. (msec)", "Baseline", "2016-02-22T07:58", 410, "",
"XYZ-1002", 3, "QTcF Int. (msec)", "Baseline", "2016-02-22T08:01", 392, "",
"XYZ-1002", 4, "QTcF Int. (msec)", "Visit 2", "2016-03-06T09:53", 407, "Active 20mg",
"XYZ-1002", 5, "QTcF Int. (msec)", "Visit 2", "2016-03-06T09:56", 400, "Active 20mg",
"XYZ-1002", 6, "QTcF Int. (msec)", "Visit 3", "2016-03-24T10:53", 414, "Active 20mg",
"XYZ-1002", 7, "QTcF Int. (msec)", "Visit 3", "2016-03-24T10:56", 402, "Active 20mg",
)
# Compute the average of AVAL only if there are more than 2 records within the
# by group
derive_summary_records(
adeg,
by_vars = exprs(USUBJID, PARAM, AVISIT),
filter = n() > 2,
analysis_var = AVAL,
summary_fun = function(x) mean(x, na.rm = TRUE),
set_values_to = exprs(DTYPE = "AVERAGE")
)
#> # A tibble: 16 x 8
#> USUBJID EGSEQ PARAM AVISIT EGDTC AVAL TRTA DTYPE
#> <chr> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr>
#> 1 XYZ-1001 1 QTcF Int. (ms… Baseline 2016-02-24T0… 385 "" NA
#> 2 XYZ-1001 2 QTcF Int. (ms… Baseline 2016-02-24T0… 399 "" NA
#> 3 XYZ-1001 3 QTcF Int. (ms… Baseline 2016-02-24T0… 396 "" NA
#> 4 XYZ-1001 4 QTcF Int. (ms… Visit 2 2016-03-08T0… 393 "Placebo" NA
#> 5 XYZ-1001 5 QTcF Int. (ms… Visit 2 2016-03-08T0… 388 "Placebo" NA
#> 6 XYZ-1001 6 QTcF Int. (ms… Visit 3 2016-03-22T1… 394 "Placebo" NA
#> 7 XYZ-1001 7 QTcF Int. (ms… Visit 3 2016-03-22T1… 402 "Placebo" NA
#> 8 XYZ-1002 1 QTcF Int. (ms… Baseline 2016-02-22T0… 399 "" NA
#> 9 XYZ-1002 2 QTcF Int. (ms… Baseline 2016-02-22T0… 410 "" NA
#> 10 XYZ-1002 3 QTcF Int. (ms… Baseline 2016-02-22T0… 392 "" NA
#> 11 XYZ-1002 4 QTcF Int. (ms… Visit 2 2016-03-06T0… 407 "Active 20… NA
#> 12 XYZ-1002 5 QTcF Int. (ms… Visit 2 2016-03-06T0… 400 "Active 20… NA
#> 13 XYZ-1002 6 QTcF Int. (ms… Visit 3 2016-03-24T1… 414 "Active 20… NA
#> 14 XYZ-1002 7 QTcF Int. (ms… Visit 3 2016-03-24T1… 402 "Active 20… NA
#> 15 XYZ-1001 NA QTcF Int. (ms… Baseline NA 393. NA AVERA…
#> 16 XYZ-1002 NA QTcF Int. (ms… Baseline NA 400. NA AVERA…