pulsatile
is an R package for Bayesian modeling of pulsatile hormone time
series data via birth-death MCMC. BDMCMC is an unsupervised statistical learning
algorithm for estimating mixture models with unknown numbers of components.
The data consists of hormone concentration measurements from blood samples typically taken every 6-10 minutes for 6-24 hours. With this specific analysis we are attempting to estimate the number and location of hormone pulses in the sample, as well as individual and sample-mean pulse features like quantity of hormone released (mass), duration of release (width), elimination rate (half-life), and baseline hormone levels.
This package currently models one subjects time-series. We are currently refactoring and developing the population models and two-hormone (driver and response hormones) models.
The development version is available on GitHub.
Example analysis
# Install/load pulsatile with devtools
if (!require(devtools)) { install.packages("devtools") }
## Loading required package: devtools
if (!require(pulsatile)) { install_github("BayesPulse/pulsatile") }
## Loading required package: pulsatile
# Install/load other packages
if (!require(tidyr)) { install.packages("tidyr") }
## Loading required package: tidyr
if (!require(dplyr)) { install.packages("dplyr") }
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if (!require(ggplot2)) { install.packages("ggplot2") }
## Loading required package: ggplot2
if (!require(ggthemes)) { install.packages("ggthemes") }
## Loading required package: ggthemes
if (!require(coda)) { install.packages("coda") }
## Loading required package: coda
# Set options
theme_set(theme_fivethirtyeight())
options(scipen = 999)
# Generate (reproducible) simulated pulsatile hormone time-series
set.seed(2018-01-23)
this_pulse <- simulate_pulse()
# View/inspect simulated data
str(this_pulse)
## List of 3
## $ call : language simulate_pulse(num_obs = 144, interval = 10, error_var = 0.005, ipi_mean = 12, ipi_var = 40, ipi_min = 4, ma| __truncated__ ...
## $ data :Classes 'tbl_df', 'tbl' and 'data.frame': 144 obs. of 3 variables:
## ..$ observation : num [1:144] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ time : num [1:144] 10 20 30 40 50 60 70 80 90 100 ...
## ..$ concentration: num [1:144] 3.11 3.82 3.04 3.14 3.03 ...
## $ parameters:Classes 'tbl_df', 'tbl' and 'data.frame': 18 obs. of 6 variables:
## ..$ pulse_no : num [1:18] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ mass : num [1:18] 4.33 3.36 5.54 10.72 3.77 ...
## ..$ width : num [1:18] 34.6 37.5 35.8 50 33.6 ...
## ..$ location : num [1:18] -91.8 130.3 192.1 292.4 339.6 ...
## ..$ mass_kappa : num [1:18] 1.4157 0.7739 0.6037 0.0637 1.2325 ...
## ..$ width_kappa: num [1:18] 2.396 0.577 0.387 1.194 0.933 ...
## - attr(*, "class")= chr "pulse_sim"
plot(this_pulse)
# Create model specification object (for priors, starting values, and proposal
# variances)
model_spec <- pulse_spec(location_prior_type = "order-statistic")
# Fit model (takes roughly 3-10 minutes)
# Note: fit_pulse accepts a plain dataset with the time and concentration
# variable names specified, or an object created by simulate_pulse()
fit_test <- fit_pulse(.data = this_pulse,
iters = 250000,
thin = 50,
spec = model_spec,
verbose = FALSE)
# Inspect posterior chains
str(fit_test, max.level = 1)
## List of 7
## $ model : chr "single-subject"
## $ call : language fit_pulse(.data = this_pulse, spec = model_spec, iters = 250000, thin = 50, verbose = FALSE)
## $ common_chain:Classes 'tbl_df', 'tbl' and 'data.frame': 4500 obs. of 9 variables:
## $ pulse_chain :Classes 'tbl_df', 'tbl' and 'data.frame': 62661 obs. of 8 variables:
## $ data :List of 3
## ..- attr(*, "class")= chr "pulse_sim"
## $ options :List of 4
## $ spec :List of 4
## ..- attr(*, "class")= chr "pulse_spec"
## - attr(*, "class")= chr "pulse_fit"
common_chain(fit_test)
## # A tibble: 4,500 x 9
## iteration num_pulses baseline mean_pulse_mass mean_pulse_width halflife
## <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 25001 14 3.12 3.56 26.6 45.1
## 2 25051 13 3.12 3.70 27.3 47.2
## 3 25101 13 3.06 3.71 19.3 50.2
## 4 25151 14 3.11 3.36 13.4 49.0
## 5 25201 13 3.18 3.89 6.08 45.0
## 6 25251 13 3.10 3.11 1.85 53.9
## 7 25301 14 2.90 3.47 15.8 58.6
## 8 25351 12 3.01 4.59 29.1 56.0
## 9 25401 13 2.81 3.80 17.7 59.2
## 10 25451 13 3.12 3.62 22.2 51.3
## # ... with 4,490 more rows, and 3 more variables: model_error <dbl>,
## # sd_mass <dbl>, sd_widths <dbl>
pulse_chain(fit_test)
## # A tibble: 62,661 x 8
## iteration total_num_pulses pulse_num location mass width eta_mass
## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 25001 14 0 129 2.05 44.4 0.896
## 2 25001 14 1 189 4.62 66.0 4.59
## 3 25001 14 2 291 6.17 35.1 0.504
## 4 25001 14 3 328 6.29 72.0 1.00
## 5 25001 14 4 440 4.65 43.1 2.00
## 6 25001 14 5 503 3.36 37.0 0.863
## 7 25001 14 6 521 2.56 5.19 0.158
## 8 25001 14 7 671 4.34 56.9 1.40
## 9 25001 14 8 735 3.58 32.8 1.63
## 10 25001 14 9 791 2.50 64.8 0.0270
## # ... with 62,651 more rows, and 1 more variable: eta_width <dbl>
# Review traceplot and posterior distributions
bp_trace(fit_test)
bp_posteriors(fit_test)
## Warning in if (type == "histogram") {: the condition has length > 1 and
## only the first element will be used
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
bp_location_posterior(fit_test)
# Calculate mean and 80% HPD credible interval
rbind(
common_chain(fit_test) %>%
summarise_at(vars(num_pulses:sd_widths), mean),
common_chain(fit_test) %>%
select(-iteration) %>%
as.mcmc %>%
HPDinterval(., prob = 0.8) %>% t
) %>%
mutate(stat = c("mean", "lower", "upper")) %>%
gather(key = parameter, value = value, -stat) %>%
spread(key = stat, value = value)
## # A tibble: 8 x 4
## parameter lower mean upper
## <chr> <dbl> <dbl> <dbl>
## 1 baseline 2.86 3.01 3.21
## 2 halflife 35.5 42.9 49.4
## 3 mean_pulse_mass 3.06 4.13 4.98
## 4 mean_pulse_width 6.85 19.0 30.5
## 5 model_error 0.0140 0.0183 0.0216
## 6 num_pulses 12.0 13.9 15.0
## 7 sd_mass 0.838 1.98 2.92
## 8 sd_widths 19.5 71.3 120