Update a Bayesian Mallows model estimated using the Metropolis-Hastings
algorithm in compute_mallows()
using the sequential Monte Carlo algorithm
described in
steinSequentialInferenceMallows2023;textualBayesMallows.
Usage
update_mallows(model, new_data, ...)
# S3 method for class 'BayesMallowsPriorSamples'
update_mallows(
model,
new_data,
model_options = set_model_options(),
smc_options = set_smc_options(),
compute_options = set_compute_options(),
priors = model$priors,
pfun_estimate = NULL,
...
)
# S3 method for class 'BayesMallows'
update_mallows(
model,
new_data,
model_options = set_model_options(),
smc_options = set_smc_options(),
compute_options = set_compute_options(),
priors = model$priors,
...
)
# S3 method for class 'SMCMallows'
update_mallows(model, new_data, ...)
Arguments
- model
A model object of class "BayesMallows" returned from
compute_mallows()
, an object of class "SMCMallows" returned from this function, or an object of class "BayesMallowsPriorSamples" returned fromsample_prior()
.- new_data
An object of class "BayesMallowsData" returned from
setup_rank_data()
. The object should contain the new data being provided.- ...
Optional arguments. Currently not used.
- model_options
An object of class "BayesMallowsModelOptions" returned from
set_model_options()
.- smc_options
An object of class "SMCOptions" returned from
set_smc_options()
.- compute_options
An object of class "BayesMallowsComputeOptions" returned from
set_compute_options()
.- priors
An object of class "BayesMallowsPriors" returned from
set_priors()
. Defaults to the priors used inmodel
.- pfun_estimate
Object returned from
estimate_partition_function()
. Defaults toNULL
, and will only be used for footrule, Spearman, or Ulam distances when the cardinalities are not available, cf.get_cardinalities()
. Only used by the specialization for objects of type "BayesMallowsPriorSamples".
See also
Other modeling:
burnin()
,
burnin<-()
,
compute_mallows()
,
compute_mallows_mixtures()
,
compute_mallows_sequentially()
,
sample_prior()
Examples
if (FALSE) { # \dontrun{
set.seed(1)
# UPDATING A MALLOWS MODEL WITH NEW COMPLETE RANKINGS
# Assume we first only observe the first four rankings in the potato_visual
# dataset
data_first_batch <- potato_visual[1:4, ]
# We start by fitting a model using Metropolis-Hastings
mod_init <- compute_mallows(
data = setup_rank_data(data_first_batch),
compute_options = set_compute_options(nmc = 10000))
# Convergence seems good after no more than 2000 iterations
assess_convergence(mod_init)
burnin(mod_init) <- 2000
# Next, assume we receive four more observations
data_second_batch <- potato_visual[5:8, ]
# We can now update the model using sequential Monte Carlo
mod_second <- update_mallows(
model = mod_init,
new_data = setup_rank_data(rankings = data_second_batch),
smc_options = set_smc_options(resampler = "systematic")
)
# This model now has a collection of particles approximating the posterior
# distribution after the first and second batch
# We can use all the posterior summary functions as we do for the model
# based on compute_mallows():
plot(mod_second)
plot(mod_second, parameter = "rho", items = 1:4)
compute_posterior_intervals(mod_second)
# Next, assume we receive the third and final batch of data. We can update
# the model again
data_third_batch <- potato_visual[9:12, ]
mod_final <- update_mallows(
model = mod_second, new_data = setup_rank_data(rankings = data_third_batch))
# We can plot the same things as before
plot(mod_final)
compute_consensus(mod_final)
# UPDATING A MALLOWS MODEL WITH NEW OR UPDATED PARTIAL RANKINGS
# The sequential Monte Carlo algorithm works for data with missing ranks as
# well. This both includes the case where new users arrive with partial ranks,
# and when previously seen users arrive with more complete data than they had
# previously.
# We illustrate for top-k rankings of the first 10 users in potato_visual
potato_top_10 <- ifelse(potato_visual[1:10, ] > 10, NA_real_,
potato_visual[1:10, ])
potato_top_12 <- ifelse(potato_visual[1:10, ] > 12, NA_real_,
potato_visual[1:10, ])
potato_top_14 <- ifelse(potato_visual[1:10, ] > 14, NA_real_,
potato_visual[1:10, ])
# We need the rownames as user IDs
(user_ids <- 1:10)
# First, users provide top-10 rankings
mod_init <- compute_mallows(
data = setup_rank_data(rankings = potato_top_10, user_ids = user_ids),
compute_options = set_compute_options(nmc = 10000))
# Convergence seems fine. We set the burnin to 2000.
assess_convergence(mod_init)
burnin(mod_init) <- 2000
# Next assume the users update their rankings, so we have top-12 instead.
mod1 <- update_mallows(
model = mod_init,
new_data = setup_rank_data(rankings = potato_top_12, user_ids = user_ids),
smc_options = set_smc_options(resampler = "stratified")
)
plot(mod1)
# Then, assume we get even more data, this time top-14 rankings:
mod2 <- update_mallows(
model = mod1,
new_data = setup_rank_data(rankings = potato_top_14, user_ids = user_ids)
)
plot(mod2)
# Finally, assume a set of new users arrive, who have complete rankings.
potato_new <- potato_visual[11:12, ]
# We need to update the user IDs, to show that these users are different
(user_ids <- 11:12)
mod_final <- update_mallows(
model = mod2,
new_data = setup_rank_data(rankings = potato_new, user_ids = user_ids)
)
plot(mod_final)
# We can also update models with pairwise preferences
# We here start by running MCMC on the first 20 assessors of the beach data
# A realistic application should run a larger number of iterations than we
# do in this example.
set.seed(3)
dat <- subset(beach_preferences, assessor <= 20)
mod <- compute_mallows(
data = setup_rank_data(
preferences = beach_preferences),
compute_options = set_compute_options(nmc = 3000, burnin = 1000)
)
# Next we provide assessors 21 to 24 one at a time. Note that we sample the
# initial augmented rankings in each particle for each assessor from 200
# different topological sorts consistent with their transitive closure.
for(i in 21:24){
mod <- update_mallows(
model = mod,
new_data = setup_rank_data(
preferences = subset(beach_preferences, assessor == i),
user_ids = i),
smc_options = set_smc_options(latent_sampling_lag = 0,
max_topological_sorts = 200)
)
}
# Compared to running full MCMC, there is a downward bias in the scale
# parameter. This can be alleviated by increasing the number of particles,
# MCMC steps, and the latent sampling lag.
plot(mod)
compute_consensus(mod)
} # }