Skip to contents

The function ensures that the by variables are contained in all source datasets.

Usage

extend_source_datasets(source_datasets, by_vars)

Arguments

source_datasets

Source datasets

A named list of datasets is expected. Each dataset must contain either all by variables or none of the by variables.

by_vars

By variables

Value

The list of extended source datasets

Details

  1. The by groups are determined as the union of the by groups occurring in the source datasets.

  2. For all source datasets which do not contain the by variables the source dataset is replaced by the cartesian product of the source dataset and the by groups.

Examples

library(tibble)
library(dplyr, warn.conflicts = FALSE)
library(lubridate)

adsl <- tribble(
  ~USUBJID, ~TRTSDT,           ~EOSDT,
  "01",     ymd("2020-12-06"), ymd("2021-03-06"),
  "02",     ymd("2021-01-16"), ymd("2021-02-03")
) %>%
  mutate(STUDYID = "AB42")

ae <- tribble(
  ~USUBJID, ~AESTDTC,           ~AESEQ, ~AEDECOD,
  "01",     "2021-01-03T10:56", 1,      "Flu",
  "01",     "2021-03-04",       2,      "Cough",
  "01",     "2021",             3,      "Flu"
) %>%
  mutate(STUDYID = "AB42")

extend_source_datasets(
  source_datasets = list(adsl = adsl, ae = ae),
  by_vars = exprs(AEDECOD)
)
#> $adsl
#> # A tibble: 4 x 5
#>   AEDECOD USUBJID TRTSDT     EOSDT      STUDYID
#>   <chr>   <chr>   <date>     <date>     <chr>  
#> 1 Flu     01      2020-12-06 2021-03-06 AB42   
#> 2 Flu     02      2021-01-16 2021-02-03 AB42   
#> 3 Cough   01      2020-12-06 2021-03-06 AB42   
#> 4 Cough   02      2021-01-16 2021-02-03 AB42   
#> 
#> $ae
#> # A tibble: 3 x 5
#>   USUBJID AESTDTC          AESEQ AEDECOD STUDYID
#>   <chr>   <chr>            <dbl> <chr>   <chr>  
#> 1 01      2021-01-03T10:56     1 Flu     AB42   
#> 2 01      2021-03-04           2 Cough   AB42   
#> 3 01      2021                 3 Flu     AB42   
#>