Creating a Marketing Mix Model with Meta Robyn: A Step-by-Step Guide

In the dynamic landscape of digital marketing, staying ahead requires not only creativity but also a data-driven approach. One powerful tool in the marketer’s arsenal is the Robyn package, a versatile R library designed to optimize marketing mix models. Let’s break down how you can leverage Robyn to enhance your marketing strategies.

Setting Up the Robyn Environment

Before diving into the data, the first step is setting up the environment. This involves installing and loading the Robyn package, ensuring you have the latest version, and configuring the environment, specifying whether to create local files.

# Install and load Robyn package
install.packages("Robyn")
library(Robyn)

# Check and update package version
update.packages("Robyn")

Loading Data for Analysis

With the environment ready, the next crucial step is loading the data. This can involve checking a simulated dataset or loading your own. Additionally, you can check for holidays using Prophet.

# Load data (simulated or your own dataset)
data("dt_simulated_weekly")

# Check holidays using Prophet
data("dt_prophet_holidays")

Model Specification and Calibration

Now comes the heart of the process—defining the input variables for the model, specifying hyperparameters for adstock and saturation, and optionally calibrating the model using experimental input.

# Define input variables
InputCollect <- robyn_inputs(
  dt_input = dt_simulated_weekly,
  dt_holidays = dt_prophet_holidays,
  date_var = "DATE", # date format must be "2020-01-01"
  dep_var = "revenue", # there should be only one dependent variable
  dep_var_type = "revenue", # "revenue" (ROI) or "conversion" (CPA)
  prophet_vars = c("trend", "season", "holiday"), # "trend","season", "weekday" & "holiday"
  prophet_country = "DE", # input country code. Check: dt_prophet_holidays
  context_vars = c("competitor_sales_B", "events"), # e.g. competitors, discount, unemployment etc
  paid_media_spends = c("tv_S", "ooh_S", "print_S", "facebook_S", "search_S"), # mandatory input
  paid_media_vars = c("tv_S", "ooh_S", "print_S", "facebook_I", "search_clicks_P"), # mandatory.
  # paid_media_vars must have same order as paid_media_spends. Use media exposure metrics like
  # impressions, GRP etc. If not applicable, use spend instead.
  organic_vars = "newsletter", # marketing activity without media spend
  # factor_vars = c("events"), # force variables in context_vars or organic_vars to be categorical
  window_start = "2016-01-01",
  window_end = "2018-12-31",
  adstock = "geometric" # geometric, weibull_cdf or weibull_pdf.
)

# Specify hyperparameters for adstock and saturation
# Example hyperparameters ranges for Geometric adstock
hyperparameters <- list(
  facebook_S_alphas = c(0.5, 3),
  facebook_S_gammas = c(0.3, 1),
  facebook_S_thetas = c(0, 0.3),
  print_S_alphas = c(0.5, 3),
  print_S_gammas = c(0.3, 1),
  print_S_thetas = c(0.1, 0.4),
  tv_S_alphas = c(0.5, 3),
  tv_S_gammas = c(0.3, 1),
  tv_S_thetas = c(0.3, 0.8),
  search_S_alphas = c(0.5, 3),
  search_S_gammas = c(0.3, 1),
  search_S_thetas = c(0, 0.3),
  ooh_S_alphas = c(0.5, 3),
  ooh_S_gammas = c(0.3, 1),
  ooh_S_thetas = c(0.1, 0.4),
  newsletter_alphas = c(0.5, 3),
  newsletter_gammas = c(0.3, 1),
  newsletter_thetas = c(0.1, 0.4),
  train_size = c(0.5, 0.8)
)

InputCollect <- robyn_inputs(InputCollect = InputCollect, hyperparameters = hyperparameters)

# Optionally calibrate the model using experimental input
calibration_input <- data.frame(
#   # channel name must in paid_media_vars
   channel = c("facebook_S",  "tv_S", "facebook_S+search_S", "newsletter"),
   # liftStartDate must be within input data range
   liftStartDate = as.Date(c("2018-05-01", "2018-04-03", "2018-07-01", "2017-12-01")),
   # liftEndDate must be within input data range
   liftEndDate = as.Date(c("2018-06-10", "2018-06-03", "2018-07-20", "2017-12-31")),
   # Provided value must be tested on same campaign level in model and same metric as dep_var_type
   liftAbs = c(400000, 300000, 700000, 200),
   # Spend within experiment: should match within a 10% error your spend on date range for each channel from dt_input
   spend = c(421000, 7100, 350000, 0),
   # Confidence: if frequentist experiment, you may use 1 - pvalue
   confidence = c(0.85, 0.8, 0.99, 0.95),
   # KPI measured: must match your dep_var
   metric = c("revenue", "revenue", "revenue", "revenue"),
   # Either "immediate" or "total". For experimental inputs like Facebook Lift, "immediate" is recommended.
   calibration_scope = c("immediate", "immediate", "immediate", "immediate")
 )
 InputCollect <- robyn_inputs(InputCollect = InputCollect, calibration_input = calibration_input)

Building the Initial Model

Once the model is specified, it’s time to run it with multiple trials and iterations. Check convergence plots and time-series validation plots to ensure the model’s accuracy. Export the results and plots for further analysis.

# Run Robyn model with multiple trials and iterations
OutputModels <- robyn_run(
  InputCollect = InputCollect, # feed in all model specification
  cores = NULL, # NULL defaults to (max available - 1)
  iterations = 2000, # 2000 recommended for the dummy dataset with no calibration
  trials = 5, # 5 recommended for the dummy dataset
  ts_validation = TRUE, # 3-way-split time series for NRMSE validation.
  add_penalty_factor = FALSE # Experimental feature. Use with caution.
)

# Check convergence plots and time-series validation plots
OutputModels$convergence$moo_distrb_plot
OutputModels$convergence$moo_cloud_plot

# Export model results and plots
OutputCollect <- robyn_outputs(
  InputCollect, OutputModels,
  pareto_fronts = "auto", # automatically pick how many pareto-fronts to fill min_candidates (100)
  # min_candidates = 100, # top pareto models for clustering. Default to 100
  # calibration_constraint = 0.1, # range c(0.01, 0.1) & default at 0.1
  csv_out = "pareto", # "pareto", "all", or NULL (for none)
  clusters = TRUE, # Set to TRUE to cluster similar models by ROAS. See ?robyn_clusters
  export = create_files, # this will create files locally
  plot_folder = robyn_directory, # path for plots exports and files creation
  plot_pareto = create_files # Set to FALSE to deactivate plotting and saving model one-pagers
)

Selecting and Saving a Model

Comparing models is crucial to finding the one that aligns with business reality. Export the selected model as a JSON file for easy access and optional plotting and checking using a one-pager.

# Compare models and select based on business reality
print(OutputCollect)
select_model <- "1_122_7" # Pick one of the models from OutputCollect to proceed

# Export selected model as a JSON file
ExportedModel <- robyn_write(InputCollect, OutputCollect, select_model, export = create_files)
print(ExportedModel)

# Optionally plot and check the one-pager for the selected model
myOnePager <- robyn_onepagers(InputCollect, OutputCollect, select_model, export = FALSE)

Christos Visvardis image-7-916x1024 Creating a Marketing Mix Model with Meta Robyn: A Step-by-Step Guide — A Meta Robyn one-pager example

Budget Allocation and Model Refresh

Utilize the selected model to allocate budget based on different scenarios, maximizing response or targeting efficiency. Refresh the model as needed based on new data or added variables.

# Allocate budget based on different scenarios
# Example 1: max_response default setting: maximize response for latest month
AllocatorCollect1 <- robyn_allocator(
  InputCollect = InputCollect,
  OutputCollect = OutputCollect,
  select_model = select_model,
  # date_range = NULL, # Default last month as initial period
  # total_budget = NULL, # When NULL, default is total spend in date_range
  channel_constr_low = 0.7,
  channel_constr_up = c(1.2, 1.5, 1.5, 1.5, 1.5),
  # channel_constr_multiplier = 3,
  scenario = "max_response",
  export = create_files
)
# Print & plot allocator's output
print(AllocatorCollect1)
plot(AllocatorCollect1)

# Refresh the model based on new data or added variables
# Provide JSON file with your InputCollect and ExportedModel specifications
# It can be any model, initial or a refresh model
json_file <- "~/Desktop/Robyn_202211211853_init/RobynModel-1_100_6.json"
RobynRefresh <- robyn_refresh(
  json_file = json_file,
  dt_input = dt_simulated_weekly,
  dt_holidays = dt_prophet_holidays,
  refresh_steps = 13,
  refresh_iters = 1000, # 1k is an estimation
  refresh_trials = 1
)

Analyzing Marginal Returns

Finally, analyze marginal returns for a specified metric, such as spending on a specific channel. This step provides valuable insights into the effectiveness of your marketing efforts.

# Analyze marginal returns for a specified metric
## Recreate original saturation curve
Response <- robyn_response(
  InputCollect = InputCollect,
  OutputCollect = OutputCollect,
  select_model = select_model,
  metric_name = "facebook_S"
)
Response$plot

## Get the "next 100 dollar" marginal response on Spend1
Spend1 <- 20000
Response1 <- robyn_response(
  InputCollect = InputCollect,
  OutputCollect = OutputCollect,
  select_model = select_model,
  metric_name = "facebook_S",
  metric_value = Spend1, # total budget for date_range
  date_range = "last_1" # last two periods
)
Response1$plot

In conclusion, the Robyn package offers a comprehensive framework for optimizing marketing strategies. By following these steps, marketers can enhance their decision-making processes, allocate budgets effectively, and continually refine their approach based on real-time data. Stay ahead in the digital marketing game with Robyn’s powerful capabilities.