Bob’s Burgers Episode Fingerprints by Season

Analyzing dialogue patterns across seasons

Author

Steven Ponce

Published

November 11, 2024

Figure 1: A series of radar charts showing dialogue patterns across 14 seasons of Bob’s Burgers. Each season chart displays metrics including Dialogue Density, Average Length, Sentiment Variance, Unique Words, Question Ratio, and Exclamation Ratio. Light purple polygons represent individual episodes, while dark purple lines show season averages, revealing how dialogue patterns evolved throughout the series.

Steps to Create this Graphic

1. Load Packages & Setup

Show code

```{r}
#| label: load
#| warning: false

## 1. LOAD PACKAGES & SETUP ----
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  tidyverse,         # Easily Install and Load the 'Tidyverse'
  ggtext,            # Improved Text Rendering Support for 'ggplot2'
  showtext,          # Using Fonts More Easily in R Graphs
  janitor,           # Simple Tools for Examining and Cleaning Dirty Data
  skimr,             # Compact and Flexible Summaries of Data
  scales,            # Scale Functions for Visualization
  glue,              # Interpreted String Literals
  bobsburgersR,      # Bob's Burgers Datasets for Data Visualization
  tidytext,          # Text Mining using 'dplyr', 'ggplot2', and Other Tidy Tools 
  textdata,          # Download and Load Various Text Datasets 
  patchwork          # The Composer of Plots
)    

### |- figure size ----
camcorder::gg_record(
  dir    = here::here("temp_plots"),
  device = "png",
  width  =  9,
  height =  10,
  units  = "in",
  dpi    = 320
)

### |- resolution ----
showtext_opts(dpi = 320, regular.wt = 300, bold.wt = 800)
```

2. Read in the Data

Show code

```{r}
#| label: read
#| include: true
#| eval: true
#| results: 'hide'
#| warning: false

bobsburgersR::transcript_data
```

3. Examine the Data

Show code

```{r}
#| label: examine
#| include: true
#| eval: true
#| results: 'hide'
#| warning: false

glimpse(transcript_data)
skim(transcript_data)
```

4. Tidy Data

Show code

```{r}
#| label: tidy
#| warning: false

# Calculate metrics 
episode_metrics <- transcript_data |>
  filter(!is.na(dialogue)) |>
  group_by(season, episode) |>
  summarise(
    # Basic dialogue metrics
    dialogue_density = n() / max(line),
    avg_length       = mean(str_length(dialogue)),
    
    # Sentiment analysis - AFINN Sentiment Lexicon
    sentiment_variance = dialogue |>
      tibble(text = _) |>
      unnest_tokens(word, text) |>
      inner_join(get_sentiments("afinn"), by = "word") |>
      pull(value) |>
      var(na.rm = TRUE),
    
    # Word and punctuation metrics  
    unique_words      = dialogue |>
      str_split("\\s+") |>
      unlist() |>
      n_distinct(),
    
    question_ratio    = mean(str_detect(dialogue, "\\?")),
    exclamation_ratio = mean(str_detect(dialogue, "!")),
    
    .groups = "drop"
  ) |>
  # Scale all metrics
  mutate(across(dialogue_density:exclamation_ratio, scale))


# Prepare data for visualization 
episode_metrics_long <- episode_metrics |>
  pivot_longer(
    cols = c(dialogue_density:exclamation_ratio),
    names_to = "metric",
    values_to = "value"
  ) |>
  mutate(
    angle = (as.numeric(factor(metric)) - 1) * 2 * pi / 6,
    hjust = ifelse(angle < pi, 1, 0),
    metric = case_when(
      metric == "dialogue_density" ~ "Dialogue\nDensity",
      metric == "avg_length" ~ "Average\nLength",
      metric == "sentiment_variance" ~ "Sentiment\nVariance",
      metric == "unique_words" ~ "Unique\nWords",
      metric == "question_ratio" ~ "Question\nRatio",
      metric == "exclamation_ratio" ~ "Exclamation\nRatio"
    )
  )

# Filter data to remove empty groups
episode_metrics_long <- episode_metrics_long |>
  filter(!is.na(value)) |>
  group_by(season) |>
  filter(n() > 1) |>  
  ungroup()
```

5. Visualization Parameters

Show code

```{r}
#| label: params
#| include: true
#| warning: false

### |- plot aesthetics ----
bkg_col      <- "#f5f5f2"  
title_col    <- "gray20"           
subtitle_col <- "gray20"     
caption_col  <- "gray30"   
text_col     <- "gray20"   

### |-  titles and caption ----
# icons
tt <- str_glue("Source: {{bobsburgersR}}")
li <- str_glue("<span style='font-family:fa6-brands'>&#xf08c;</span>")
gh <- str_glue("<span style='font-family:fa6-brands'>&#xf09b;</span>")
bs <- str_glue("<span style='font-family:fa6-brands'>&#xe671; </span>")

# text
light_purple <- str_glue("<span style='color:#A374C2'>**Light Purple**</span>")
dark_purple  <- str_glue("<span style='color:#8856a7'>**Dark Purple**</span>")

title_text    <- str_glue("Bob's Burgers Episode Fingerprints by Season")
subtitle_text <- str_glue("Analyzing dialogue patterns across seasons<br><br>
                          **Note:** Metrics are standardized (**z-scores**). { light_purple } polygons represent individual episodes.<br>
                          { dark_purple } line shows season average.")
caption_text  <- str_glue("{li} stevenponce &bull; {gh} poncest &bull; #rstats #ggplot2 &bull; {tt}")

### |-  fonts ----
font_add("fa6-brands", here::here("fonts/6.4.2/Font Awesome 6 Brands-Regular-400.otf"))
font_add_google("Oswald", regular.wt = 400, family = "title")
font_add_google("Source Sans Pro", family = "subtitle")
font_add_google("Source Sans Pro", family = "text")  
font_add_google("Roboto Mono", family = "numbers")  
font_add_google("Noto Sans", regular.wt = 400, family = "caption")
showtext_auto(enable = TRUE)


### |-  plot theme ----
theme_set(theme_minimal(base_size = 14, base_family = "text"))                

theme_update(
  plot.title.position   = "plot",
  plot.caption.position = "plot",
  legend.position       = "plot",
  plot.background       = element_rect(fill = bkg_col, color = bkg_col),
  panel.background      = element_rect(fill = bkg_col, color = bkg_col),
  plot.margin           = margin(t = 10, r = 10, b = 10, l = 10),
  axis.title.x          = element_text(margin = margin(10, 0, 0, 0), size = rel(1.1), 
                                       color = text_col, family = "text", face = "bold", hjust = 0.5),
  axis.title.y          = element_text(margin = margin(0, 10, 0, 0), size = rel(1.1), 
                                       color = text_col, family = "text", face = "bold", hjust = 0.5),
  axis.text             = element_text(size = rel(0.5), color = text_col, family = "text"),
  strip.text            = element_text(size = rel(1), face = "bold", margin = margin(b = 10), family = "text"),
  panel.grid.major      = element_line(color = "gray90", linewidth = 0.2),
  panel.spacing.x       = unit(3, "lines"),  
  panel.spacing.y       = unit(1, "lines"),  
  aspect.ratio          = 1  
)
```

6. Plot

Show code

```{r}
### |- main plot ---- 
main_plot <- episode_metrics_long |>   
  ggplot(aes(x = metric, y = value)) +

  # Geoms
  # Add grid lines
  geom_hline(yintercept = seq(-2, 7, by = 1), color = "gray90", linewidth = 0.2) +
  
  # Add episode polygons
  geom_polygon(aes(group = interaction(season, episode)),
               fill = "#8856a7",
               alpha = 0.2) +
  
  # Add season average line
  stat_summary(aes(group = season),
               fun = 'mean', 
               geom = "path",
               color = "#8856a7",
               size = 0.8,
               alpha = 0.9,
               na.rm = TRUE) +
  
  # Scales
  scale_x_discrete(expand = expansion(add = 1.2)) +  
  scale_y_continuous(
    expand = expansion(add = c(0.5, 1)),
    limits = c(-2, 7)                 
  ) +
  coord_polar(clip = 'off') +

  # Labs
  labs(
    x = NULL,
    y = NULL,
  ) +
  
  # Facet 
  facet_wrap(~season, nrow = 4, 
             labeller = labeller(season = function(x) paste("Season", x))
  ) +
  
  # Theme
  theme(
    axis.text.y = element_blank(),
    axis.title = element_blank(),
    plot.margin = margin(0, 0, 0, 0)
  )

### |- key pattern plot ----
key_patterns_plot <- ggplot() +
  annotate(
    "richtext",
    x = 0,
    y = 0,
    label = glue::glue(
      "<span style='font-family:title;font-size:12pt;color:{title_col}'>**Key Patterns:**</span><br>
      <span style='font-family:subtitle;font-size:9pt;color:{text_col}'>
      • Early seasons (1-3): more experimental patterns<br>
      • Middle seasons (4-8): consistent style<br>
      • Later seasons: refined structure<br>
      • Higher variance: character episodes<br>
      • Higher question ratios: mystery plots
      </span>"
    ),
    fill = NA,
    label.color = NA,
    hjust = 0,
    vjust = 1.2,
  ) +
  theme_void() +
  theme(
    plot.margin = margin(5, 10, 5, 10)      
  )

### |- title & subtitle plot ----
title_plot <- ggplot() +
  labs(
    title = title_text,
    subtitle = subtitle_text
  ) +
  theme_void() +
  theme(
    plot.title      = element_text(
      size          = rel(1.8),
      family        = "title",
      face          = "bold",
      color         = title_col,
      lineheight    = 1.1,
      margin        = margin(t = 5, b = 5)
    ),   
    plot.subtitle   = element_markdown(
      size          = rel(1.1),
      family        = "subtitle",
      color         = subtitle_col,
      lineheight    = 1.1,
      margin        = margin(t = 5, b = 15)
    ),
    plot.background = element_rect(fill = bkg_col, color = bkg_col),
    plot.margin     = margin(10, 10, 0, 10)
  )

### |- combined plot ----

# Define layout design with adjusted areas

# area(t, l, b, r)

# where:
# t = top row position
# l = left column position
# b = bottom row position
# r = right column position

design <- c(
    area(1, 1, 1, 6),      # title area
    area(2, 1, 5, 6),      # main plot area
    area(4, 2, 5, 6)       # key patterns area 
)

combined_plot <- title_plot +  main_plot + key_patterns_plot +
  plot_layout(
    design = design,
    heights = c(0.8, 4, 4, 4, 0.2), 
    widths = c(1, 1, 1, 1, 1, 0.2)
  ) +
  plot_annotation(
    caption = caption_text,
    theme = theme(
      plot.background = element_rect(fill = bkg_col, color = bkg_col),
      plot.margin     = margin(10, 10, 10, 10),
      plot.caption    = element_markdown(
        size          = rel(0.65),
        family        = "caption",
        color         = caption_col,
        lineheight    = 1.1,
        hjust         = 0.5,
        margin        = margin(t = 10, b = 5)
      )
    )
  )
```

7. Save

Show code

```{r}
#| label: save
#| warning: false

# There was some issues between patchwork and ggsave

# Make sure these pkgs are installed
if (!require("ggplotify")) install.packages("ggplotify")
if (!require("grid")) install.packages("grid")

# Use Arial (Windows system font)
windowsFonts(Arial = windowsFont("Arial"))

font_add("fa6-brands", here::here("fonts/6.4.2/Font Awesome 6 Brands-Regular-400.otf"))

# Convert patchwork plot to grob
plot_grob <- as.grob(combined_plot)

# Set up the PNG device with proper font handling
png(
  filename = here::here("projects/standalone_visualizations/sa_2024-11-11.png"),
  width = 9, 
  height = 10, 
  units = "in", 
  res = 320,
  type = "cairo" 
)

# Enable showtext with specific settings
showtext::showtext_begin()
showtext::showtext_opts(dpi = 320, regular.wt = 300, bold.wt = 800)

# Draw the plot
grid::grid.draw(plot_grob)

# Clean up
showtext::showtext_end()
invisible(dev.off())

# Create thumbnail
magick::image_read(here::here("projects/standalone_visualizations/sa_2024-11-11.png")) |> 
  magick::image_resize(geometry = "400") |> 
  magick::image_write(here::here("projects/standalone_visualizations/thumbnails/sa_2024-11-11.png"))
```

8. Session Info

Expand for Session Info

R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 22631)

Matrix products: default


locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] grid      stats     graphics  grDevices datasets  utils     methods  
[8] base     

other attached packages:
 [1] ggplotify_0.1.2         patchwork_1.3.0         textdata_0.4.5         
 [4] tidytext_0.4.2          bobsburgersR_0.0.0.9000 glue_1.8.0             
 [7] scales_1.3.0            skimr_2.1.5             janitor_2.2.0          
[10] showtext_0.9-7          showtextdb_3.0          sysfonts_0.8.9         
[13] ggtext_0.1.2            lubridate_1.9.3         forcats_1.0.0          
[16] stringr_1.5.1           dplyr_1.1.4             purrr_1.0.2            
[19] readr_2.1.5             tidyr_1.3.1             tibble_3.2.1           
[22] ggplot2_3.5.1           tidyverse_2.0.0         pacman_0.5.1           

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.1   farver_2.1.2       fastmap_1.2.0      janeaustenr_1.0.0 
 [5] digest_0.6.37      timechange_0.3.0   lifecycle_1.0.4    rsvg_2.6.1        
 [9] tokenizers_0.3.0   magrittr_2.0.3     compiler_4.4.0     rlang_1.1.6       
[13] tools_4.4.0        utf8_1.2.4         yaml_2.3.10        knitr_1.49        
[17] labeling_0.4.3     htmlwidgets_1.6.4  curl_6.0.0         here_1.0.1        
[21] xml2_1.3.6         camcorder_0.1.0    repr_1.1.7         withr_3.0.2       
[25] fansi_1.0.6        colorspace_2.1-1   cli_3.6.4          rmarkdown_2.29    
[29] generics_0.1.3     rstudioapi_0.17.1  tzdb_0.5.0         commonmark_1.9.2  
[33] base64enc_0.1-3    vctrs_0.6.5        yulab.utils_0.1.8  Matrix_1.7-0      
[37] jsonlite_1.8.9     gridGraphics_0.5-1 hms_1.1.3          systemfonts_1.1.0 
[41] magick_2.8.5       gifski_1.32.0-1    codetools_0.2-20   stringi_1.8.4     
[45] gtable_0.3.6       munsell_0.5.1      pillar_1.9.0       rappdirs_0.3.3    
[49] htmltools_0.5.8.1  R6_2.5.1           rprojroot_2.0.4    evaluate_1.0.1    
[53] lattice_0.22-6     markdown_1.13      SnowballC_0.7.1    gridtext_0.1.5    
[57] snakecase_0.11.1   renv_1.0.3         Rcpp_1.0.13-1      svglite_2.1.3     
[61] xfun_0.49          fs_1.6.5           pkgconfig_2.0.3

9. GitHub Repository

Expand for GitHub Repo

Access the GitHub repository here