Add a Variable Flagging the First or Last Observation Within Each By Group
Source:R/derive_var_extreme_flag.R
derive_var_extreme_flag.Rd
Add a variable flagging the first or last observation within each by group
Arguments
- dataset
Input dataset
The variables specified by the
by_vars
parameter are expected.- by_vars
Grouping variables
Permitted Values: list of variables
- order
Sort order
The first or last observation is determined with respect to the specified order.
Permitted Values: list of variables or functions of variables
- new_var
Variable to add
The specified variable is added to the output dataset. It is set to
"Y"
for the first or last observation (depending on the mode) of each by group.Permitted Values: list of name-value pairs
- mode
Flag mode
Determines of the first or last observation is flagged.
Permitted Values:
"first"
,"last"
- check_type
Check uniqueness?
If
"warning"
or"error"
is specified, the specified message is issued if the observations of the input dataset are not unique with respect to the by variables and the order.Default:
"warning"
Permitted Values:
"none"
,"warning"
,"error"
Details
For each group (with respect to the variables specified for the
by_vars
parameter), new_var
is set to "Y" for the first or last observation
(with respect to the order specified for the order
parameter and the flag mode
specified for the mode
parameter). Only observations included by the filter
parameter
are considered for flagging.
Otherwise, new_var
is set to NA
. Thus, the direction of "worst" is considered fixed for
all parameters in the dataset depending on the order
and the mode
, i.e. for every
parameter the first or last record will be flagged across the whole dataset.
See also
General Derivation Functions for all ADaMs that returns variable appended to dataset:
derive_var_joined_exist_flag()
,
derive_var_last_dose_amt()
,
derive_var_last_dose_date()
,
derive_var_last_dose_grp()
,
derive_var_merged_cat()
,
derive_var_merged_character()
,
derive_var_merged_exist_flag()
,
derive_var_merged_summary()
,
derive_var_obs_number()
,
derive_var_relative_flag()
,
derive_vars_joined()
,
derive_vars_last_dose()
,
derive_vars_merged_lookup()
,
derive_vars_merged()
,
derive_vars_transposed()
,
get_summary_records()
Examples
library(tibble)
library(dplyr, warn.conflicts = FALSE)
example_vs <- tribble(
~USUBJID, ~VSTESTCD, ~VISIT, ~VISITNUM, ~VSTPTNUM, ~VSSTRESN,
"1001", "DIABP", "SCREENING", 1, 10, 64,
"1001", "DIABP", "SCREENING", 1, 11, 66,
"1001", "DIABP", "BASELINE", 2, 100, 68,
"1001", "DIABP", "BASELINE", 2, 101, 68,
"1001", "DIABP", "WEEK 2", 3, 200, 72,
"1001", "DIABP", "WEEK 2", 3, 201, 71,
"1001", "DIABP", "WEEK 4", 4, 300, 70,
"1001", "DIABP", "WEEK 4", 4, 301, 70
)
# Flag last value for each patient, test, and visit, baseline observations are ignored
example_vs %>%
restrict_derivation(
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, VSTESTCD, VISIT),
order = exprs(VSTPTNUM),
new_var = LASTFL,
mode = "last"
),
filter = VISIT != "BASELINE"
) %>%
arrange(USUBJID, VSTESTCD, VISITNUM, VSTPTNUM) %>%
select(USUBJID, VSTESTCD, VISIT, VSTPTNUM, VSSTRESN, LASTFL)
#> # A tibble: 8 x 6
#> USUBJID VSTESTCD VISIT VSTPTNUM VSSTRESN LASTFL
#> <chr> <chr> <chr> <dbl> <dbl> <chr>
#> 1 1001 DIABP SCREENING 10 64 NA
#> 2 1001 DIABP SCREENING 11 66 Y
#> 3 1001 DIABP BASELINE 100 68 NA
#> 4 1001 DIABP BASELINE 101 68 NA
#> 5 1001 DIABP WEEK 2 200 72 NA
#> 6 1001 DIABP WEEK 2 201 71 Y
#> 7 1001 DIABP WEEK 4 300 70 NA
#> 8 1001 DIABP WEEK 4 301 70 Y
# Baseline (ABLFL) examples:
input <- tribble(
~STUDYID, ~USUBJID, ~PARAMCD, ~AVISIT, ~ADT, ~AVAL, ~DTYPE,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-27"), 15.0, NA,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-25"), 14.0, NA,
"TEST01", "PAT01", "PARAM01", "BASELINE", as.Date("2021-04-23"), 15.0, "AVERAGE",
"TEST01", "PAT01", "PARAM01", "WEEK 1", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT01", "PARAM01", "WEEK 2", as.Date("2021-04-30"), 12.0, NA,
"TEST01", "PAT02", "PARAM01", "SCREENING", as.Date("2021-04-27"), 15.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "BASELINE", as.Date("2021-04-25"), 14.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "BASELINE", as.Date("2021-04-23"), 15.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "WEEK 1", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT02", "PARAM01", "WEEK 2", as.Date("2021-04-30"), 12.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-27"), 15.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-25"), 14.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "SCREENING", as.Date("2021-04-23"), 15.0, NA,
"TEST01", "PAT01", "PARAM02", "BASELINE", as.Date("2021-04-27"), 10.0, "AVERAGE",
"TEST01", "PAT01", "PARAM02", "WEEK 2", as.Date("2021-04-30"), 12.0, NA,
"TEST01", "PAT02", "PARAM02", "SCREENING", as.Date("2021-04-27"), 15.0, NA,
"TEST01", "PAT02", "PARAM02", "BASELINE", as.Date("2021-04-25"), 14.0, NA,
"TEST01", "PAT02", "PARAM02", "WEEK 1", as.Date("2021-04-23"), 15.0, NA,
"TEST01", "PAT02", "PARAM02", "WEEK 1", as.Date("2021-04-27"), 10.0, NA,
"TEST01", "PAT02", "PARAM02", "BASELINE", as.Date("2021-04-30"), 12.0, NA
)
# Last observation
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Worst observation - Direction = High
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(AVAL, ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Worst observation - Direction = Lo
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(desc(AVAL), ADT),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 2 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA NA
#> 3 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA Y
#> 4 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 5 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 6 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 7 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 8 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA Y
#> 9 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 10 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 11 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 12 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 13 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 15 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 16 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 17 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 18 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 20 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
# Average observation
restrict_derivation(
input,
derivation = derive_var_extreme_flag,
args = params(
by_vars = exprs(USUBJID, PARAMCD),
order = exprs(ADT, desc(AVAL)),
new_var = ABLFL,
mode = "last"
),
filter = AVISIT == "BASELINE" & DTYPE == "AVERAGE"
)
#> # A tibble: 20 x 8
#> STUDYID USUBJID PARAMCD AVISIT ADT AVAL DTYPE ABLFL
#> <chr> <chr> <chr> <chr> <date> <dbl> <chr> <chr>
#> 1 TEST01 PAT01 PARAM01 BASELINE 2021-04-23 15 AVERAGE Y
#> 2 TEST01 PAT01 PARAM02 BASELINE 2021-04-27 10 AVERAGE Y
#> 3 TEST01 PAT02 PARAM01 BASELINE 2021-04-23 15 AVERAGE NA
#> 4 TEST01 PAT02 PARAM01 BASELINE 2021-04-25 14 AVERAGE Y
#> 5 TEST01 PAT01 PARAM01 BASELINE 2021-04-27 15 NA NA
#> 6 TEST01 PAT01 PARAM01 BASELINE 2021-04-25 14 NA NA
#> 7 TEST01 PAT01 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 8 TEST01 PAT01 PARAM01 WEEK 2 2021-04-30 12 NA NA
#> 9 TEST01 PAT02 PARAM01 SCREENING 2021-04-27 15 AVERAGE NA
#> 10 TEST01 PAT02 PARAM01 WEEK 1 2021-04-27 10 AVERAGE NA
#> 11 TEST01 PAT02 PARAM01 WEEK 2 2021-04-30 12 AVERAGE NA
#> 12 TEST01 PAT01 PARAM02 SCREENING 2021-04-27 15 AVERAGE NA
#> 13 TEST01 PAT01 PARAM02 SCREENING 2021-04-25 14 AVERAGE NA
#> 14 TEST01 PAT01 PARAM02 SCREENING 2021-04-23 15 NA NA
#> 15 TEST01 PAT01 PARAM02 WEEK 2 2021-04-30 12 NA NA
#> 16 TEST01 PAT02 PARAM02 SCREENING 2021-04-27 15 NA NA
#> 17 TEST01 PAT02 PARAM02 BASELINE 2021-04-25 14 NA NA
#> 18 TEST01 PAT02 PARAM02 WEEK 1 2021-04-23 15 NA NA
#> 19 TEST01 PAT02 PARAM02 WEEK 1 2021-04-27 10 NA NA
#> 20 TEST01 PAT02 PARAM02 BASELINE 2021-04-30 12 NA NA
# OCCURDS Examples
example_ae <- tribble(
~USUBJID, ~AEBODSYS, ~AEDECOD, ~AESEV, ~AESTDY, ~AESEQ,
"1015", "GENERAL DISORDERS", "ERYTHEMA", "MILD", 2, 1,
"1015", "GENERAL DISORDERS", "PRURITUS", "MILD", 2, 2,
"1015", "GI DISORDERS", "DIARRHOEA", "MILD", 8, 3,
"1023", "CARDIAC DISORDERS", "AV BLOCK", "MILD", 22, 4,
"1023", "SKIN DISORDERS", "ERYTHEMA", "MILD", 3, 1,
"1023", "SKIN DISORDERS", "ERYTHEMA", "SEVERE", 5, 2,
"1023", "SKIN DISORDERS", "ERYTHEMA", "MILD", 8, 3
)
# Most severe AE first occurrence per patient
example_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCIFL,
by_vars = exprs(USUBJID),
order = exprs(TEMP_AESEVN, AESTDY, AESEQ),
mode = "first"
) %>%
arrange(USUBJID, AESTDY, AESEQ) %>%
select(USUBJID, AEDECOD, AESEV, AESTDY, AESEQ, AOCCIFL)
#> # A tibble: 7 x 6
#> USUBJID AEDECOD AESEV AESTDY AESEQ AOCCIFL
#> <chr> <chr> <chr> <dbl> <dbl> <chr>
#> 1 1015 ERYTHEMA MILD 2 1 Y
#> 2 1015 PRURITUS MILD 2 2 NA
#> 3 1015 DIARRHOEA MILD 8 3 NA
#> 4 1023 ERYTHEMA MILD 3 1 NA
#> 5 1023 ERYTHEMA SEVERE 5 2 Y
#> 6 1023 ERYTHEMA MILD 8 3 NA
#> 7 1023 AV BLOCK MILD 22 4 NA
# Most severe AE first occurrence per patient per body system
example_ae %>%
mutate(
TEMP_AESEVN =
as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))
) %>%
derive_var_extreme_flag(
new_var = AOCCSIFL,
by_vars = exprs(USUBJID, AEBODSYS),
order = exprs(TEMP_AESEVN, AESTDY, AESEQ),
mode = "first"
) %>%
arrange(USUBJID, AESTDY, AESEQ) %>%
select(USUBJID, AEBODSYS, AESEV, AESTDY, AOCCSIFL)
#> # A tibble: 7 x 5
#> USUBJID AEBODSYS AESEV AESTDY AOCCSIFL
#> <chr> <chr> <chr> <dbl> <chr>
#> 1 1015 GENERAL DISORDERS MILD 2 Y
#> 2 1015 GENERAL DISORDERS MILD 2 NA
#> 3 1015 GI DISORDERS MILD 8 Y
#> 4 1023 SKIN DISORDERS MILD 3 NA
#> 5 1023 SKIN DISORDERS SEVERE 5 Y
#> 6 1023 SKIN DISORDERS MILD 8 NA
#> 7 1023 CARDIAC DISORDERS MILD 22 Y