---
title: "Musical Journey Through Numbers"
author: "Asitav Sen"
date: "2025-04-07"
date-modified: "2025-04-07"
categories: [visualization,R,analysis]
image: "Pink_Floyd_Albums.png"
format:
html:
page-layout: article
toc: false
lightbox: auto
---
```{r init, message=FALSE, warning=FALSE, include=FALSE, paged.print=FALSE}
library(highcharter)
library(jsonlite)
library(data.table)
library(lubridate)
library(ggbump)
library(ggplot2)
library(ggrepel)
library(plotly)
# #Read JSON file
# data <- fromJSON("Apple Music Library Tracks.json")
#
# # Create dataframe with selected columns
# df <- data.frame(
# Content_Type = data$`Content Type`,
# Track_ID = data$`Track Identifier`,
# Title = data$Title,
# Artist = data$Artist,
# Genre = data$Genre,
# Album=data$Album
# )
#
# play.activity<-read.csv("Apple Music Play Activity.csv")
# play.activity <- as.data.table(play.activity)
# play.activity.sel <- play.activity[, .(
# Device.App.Name,
# End.Reason.Type,
# Song.Name,
# Play.Duration.Milliseconds,
# Event.End.Timestamp,
# Event.Start.Timestamp
# )]
#
# # Filter for NATURAL_END_OF_TRACK
# play.activity.filtered <- play.activity.sel[End.Reason.Type != "PLAYBACK_MANUALLY_PAUSED"]
#
# # Convert Event.End.Timestamp to datetime
# # Assuming it's in standard format. Adjust format if needed
# play.activity.filtered[, Event.Start.Timestamp := as_datetime(Event.Start.Timestamp,
# format = "%Y-%m-%dT%H:%M:%OS%z",
# tz = "UTC")]
#
# # Join with df
# # First convert df to data.table if it isn't already
# setDT(df)
#
# # Perform the join
# merged_data <- play.activity.filtered[df, on = c("Song.Name" = "Title")]
#
# merged_data <- na.omit(
# merged_data,
# cols = c("End.Reason.Type", "Play.Duration.Milliseconds")
# )
#
# play.activity.music <- merged_data[Device.App.Name != "TVMusic"]
#
# write.csv(play.activity.music, "activitymusic.csv")
apple_theme <- hc_theme(
colors = c("#007AFF", "#FF2D55", "#5856D6", "#FF9500", "#4CD964", "#1D1D1F"),
chart = list(
backgroundColor = "#FFFFFF",
style = list(
fontFamily = "SF Pro Display, -apple-system, BlinkMacSystemFont, sans-serif"
)
),
title = list(
style = list(
fontSize = "24px",
fontWeight = "bold",
color = "#1D1D1F"
)
),
subtitle = list(
style = list(
fontSize = "16px",
color = "#86868B"
)
),
legend = list(
itemStyle = list(
fontWeight = "normal",
fontSize = "13px"
)
),
xAxis = list(
labels = list(
style = list(
color = "#86868B",
fontSize = "13px"
)
),
lineColor = "#E5E5E5",
gridLineColor = "#E5E5E5"
),
yAxis = list(
labels = list(
style = list(
color = "#86868B",
fontSize = "13px"
)
),
lineColor = "#E5E5E5",
gridLineColor = "#E5E5E5"
),
plotOptions = list(
series = list(
borderRadius = 8,
borderWidth = 0,
shadow = FALSE
)
),
tooltip = list(
backgroundColor = "rgba(255, 255, 255, 0.96)",
borderWidth = 0,
borderRadius = 12,
shadow = TRUE,
style = list(
fontSize = "14px"
)
)
)
play.activity<-read.csv("activitymusic.csv")
#daily<-read.csv("daily.csv")
```
## A Data-Driven Analysis of My Music Listening Habits
My listening data from Apple Music reveals extensive engagement: over 450 songs from approximately 240 albums were played more than 6,700 times, totaling over 240 hours of music. This averages to roughly 1.6 hours of daily listening, with each song played about 44 times.
This analysis, inspired by [ Spotify Wrapped's ](https://en.wikipedia.org/wiki/Spotify_Wrapped) popular year-end reviews, uses data directly exported from Apple Music to create detailed visualizations of my listening patterns.
```{r}
#| label: total
#| echo: false
#| warning: false
#| message: false
setDT (play.activity)
# Calculate KPIs
# Calculate KPIs
kpis <- play.activity[! is.na (Album) & ! is.na (Play.Duration.Milliseconds), .(
total_plays = .N,
total_duration = round (sum (Play.Duration.Milliseconds)/ (1000 * 60 * 60 ), 1 ), # in hours
total_songs = uniqueN (Song.Name),
total_albums = uniqueN (Album)
)]
# Calculate weekly metrics
weekly_stats <- play.activity[
! is.na (Album) & ! is.na (Play.Duration.Milliseconds),
.(
songs = uniqueN (Song.Name),
plays = .N,
duration = round (sum (Play.Duration.Milliseconds)/ (1000 * 60 * 60 ), 1 ) # in hours
),
by = .(week = floor_date (as.Date (Event.Start.Timestamp), "week" ))
]
# Calculate averages
weekly_averages <- weekly_stats[, .(
avg_songs = round (mean (songs), 1 ),
avg_plays = round (mean (plays), 1 ),
avg_duration = round (mean (duration), 1 )
)]
```
```{r}
#| label: listening-journey
#| echo: false
#| warning: false
#| message: false
setDT (play.activity)
# Calculate monthly listening using data.table
monthly_listening <- play.activity[
, .(tracks = .N),
by = .(month = format (as.Date (Event.Start.Timestamp
), "%Y-%m" ))
][order (month)]
# Add month names for better display
monthly_listening[, month_name : = format (as.Date (paste0 (month, "-01" )), "%b %Y" )]
highchart () |>
hc_chart (
type = "area" ,
style = list (fontFamily = "SF Pro Display" )
) |>
hc_title (
text = "My Listening Journey is full of bumps" ,
align = "left" ,
style = list (fontSize = "24px" )
) |>
hc_subtitle (
text = "Monthly track plays over time" ,
align = "left"
) |>
hc_xAxis (
categories = monthly_listening$ month_name,
labels = list (
style = list (fontSize = "12px" ),
rotation = - 45
),
tickmarkPlacement = "on" ,
title = list (text = "" )
) |>
hc_yAxis (
title = list (text = "Tracks Played" ),
labels = list (style = list (fontSize = "12px" )),
gridLineColor = "#f8f9fa" ,
gridLineWidth = 0.5
) |>
hc_add_series (
data = monthly_listening$ tracks,
name = "Tracks" ,
fillColor = list (
linearGradient = list (
x1 = 0 ,
y1 = 0 ,
x2 = 0 ,
y2 = 1
),
stops = list (
list (0 , "#007AFF80" ), # Apple Blue with opacity
list (1 , "#ffffff00" ) # Transparent
)
),
color = "#007AFF" , # Apple Blue
lineWidth = 2 ,
marker = list (
enabled = TRUE ,
radius = 4 ,
symbol = "circle" ,
fillColor = "#FFFFFF" ,
lineWidth = 2 ,
lineColor = "#007AFF"
)
) |>
hc_tooltip (
backgroundColor = "rgba(255, 255, 255, 0.9)" ,
borderWidth = 0 ,
borderRadius = 8 ,
shadow = FALSE ,
style = list (fontSize = "14px" ),
headerFormat = "<span style='font-size: 12px'>{point.x}</span><br/>" ,
pointFormat = "<span style='color: #007AFF'>●</span> <b>{point.y}</b> tracks played<br/>"
) |>
hc_plotOptions (
area = list (
fillOpacity = 0.5 ,
lineWidth = 2 ,
marker = list (
enabled = FALSE ,
symbol = "circle" ,
states = list (
hover = list (enabled = TRUE )
)
),
states = list (
hover = list (
lineWidth = 3
)
)
)
) |>
hc_credits (
enabled = TRUE ,
text = "Heteroscedasticity" ,
href = "https://blog.asitavsen.com" ,
style = list (fontSize = "12px" )
) |>
hc_exporting (
enabled = TRUE ,
filename = "Listening_Journey"
) |>
hc_add_theme (apple_theme)
```
My musical preferences reveal a clear pattern dominated by rock music, particularly the psychedelic and progressive rock sounds of Pink Floyd. Here's a snapshot of my listening habits:
::: column-margin
[  ](https://en.wikipedia.org/wiki/Pink_Floyd)
[ 
:::
The hierarchy emerges distinctly across three levels:
- Genre Level: *Rock* stands as the cornerstone of my music consumption
- Artist Level: *Pink Floyd* dominates my playlist with remarkable consistency
- Album Level: "*The Dark Side of the Moon*" reigns supreme among all albums
> This three-tiered preference isn't merely coincidental - it tells a coherent story of my musical taste anchored in rock's philosophical depth and *Pink Floyd's* masterful artistry, particularly exemplified by their magnum opus "*The Dark Side of the Moon.*" This pattern holds strong across different time periods, suggesting a deep-rooted appreciation rather than a temporary infatuation.
These preferences exhibit notable longevity and consistency:
- *Rock* maintains its dominance even when expanding to subgenres.
- *Pink Floyd's* leadership position remains unchallenged across years.
- "*The Dark Side of the Moon*" continues to resonate, maintaining top position in both 2023 and 2024.
This introductory overview sets the stage for a deeper analysis of each component, revealing interesting patterns and shifts in listening habits while maintaining these core preferences.
```{r}
#| label: pink-floyd-heatmap
#| echo: false
#| warning: false
#| message: false
# Create Pink Floyd listening hours heatmap
pf_hours <- play.activity[
Artist == "Pink Floyd" ,
.(
hour = hour (as.POSIXct (Event.Start.Timestamp)),
weekday = wday (as.Date (Event.Start.Timestamp), label = TRUE )
)
][, .(plays = .N), by = .(hour, weekday)]
# Calculate total plays for percentage
total_plays <- sum (pf_hours$ plays)
# Create the data in the format Highcharts expects
heatmap_data <- pf_hours[, .(
x = hour,
y = as.numeric (weekday) - 1 ,
value = plays,
percentage = round (plays/ total_plays * 100 , 1 )
)]
highchart () |>
hc_chart (
type = "heatmap" ,
#height = 600,
backgroundColor = {
list (
linearGradient = list (x1 = 0 , y1 = 0 , x2 = 0 , y2 = 1 ),
stops = list (
list (0 , 'rgba(255, 255, 255, 0.92)' ),
list (1 , 'rgba(255, 255, 255, 0.92)' )
)
)
},
style = list (
backgroundImage = "url('https://upload.wikimedia.org/wikipedia/commons/b/bd/David_Gilmour_Rattle_That_Rock_World_Tour_Buenos_Aires_151219-6479-jikatu_%2823485842789%29.jpg')" ,
backgroundSize = "cover" ,
backgroundPosition = "center" ,
backgroundBlendMode = "soft-light"
)
) |>
hc_title (
text = "I listened to Pink Floyd mostly on Sundays" ,
align = "left" ,
style = list (
fontSize = "24px" ,
fontWeight = "bold" ,
textShadow = "2px 2px 4px rgba(255, 255, 255, 0.8)"
)
) |>
hc_subtitle (
text = "Also on weekdays, early in the mornong or afternoon" ,
align = "left" ,
useHTML = TRUE ,
style = list (
fontSize = "16px" ,
textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)"
)
) |>
hc_caption (text= "Listening patterns by hour and day<br><span style='font-size: 10px; color: #666;'>Background: David Gilmour performing in Buenos Aires, 2015 (CC BY-SA 2.0)</span>" ) |>
hc_xAxis (
title = list (
text = "Hour of Day" ,
style = list (fontSize = "14px" )
),
labels = list (
format = "{value}:00" ,
style = list (
textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)"
)
)
) |>
hc_yAxis (
categories = levels (pf_hours$ weekday),
title = list (text = "" ),
reversed = TRUE ,
labels = list (
style = list (
fontSize = "12px" ,
textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)"
)
)
) |>
hc_add_series (
data = list_parse2 (heatmap_data),
borderWidth = 1 ,
states = list (
hover = list (
brightness = 0.2
)
)
) |>
hc_colorAxis (
stops = list (
list (0 , "#ffffff" ),
list (0.5 , "#007AFF" ),
list (1 , "#002AB3" )
),
min = 0 ,
labels = list (
style = list (
textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)"
)
)
) |>
hc_tooltip (
formatter = JS ("function() {
var dayNames = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
var day = dayNames[this.point.y];
var hour = this.point.x;
var ampm = hour >= 12 ? 'PM' : 'AM';
hour = hour % 12;
hour = hour ? hour : 12;
return '<b>' + day + ', ' + hour + ':00 ' + ampm + '</b><br>' +
'Plays: <b>' + this.point.value + '</b><br>' ;
}" )
) |>
hc_legend (
title = list (
text = "Number of Plays" ,
style = list (
fontSize = "13px" ,
textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)"
)
)
) |>
hc_credits (
enabled = TRUE ,
text = "Heteroscedasticity" ,
href = "https://blog.asitavsen.com" ,
style = list (fontSize = "12px" )
) |>
hc_add_theme (apple_theme)
```
- *Rock* dominates my listening habits, accounting for over 43% of total plays
- When combined with related subgenres (*Hard Rock* and *Alternative Rock*), the rock family represents more than 45% of all plays
- The genre hierarchy remained consistent across both 2023 and 2024, with *Rock* maintaining its top position
- Bollywood music consistently held the second spot in both years, showing stable listening preferences
::: column-margin
Rock music is a dynamic and influential genre that emerged in the 1950s, characterized by electric guitars, strong rhythms, and often powerful vocals. It represents rebellion, raw emotion, and social commentary, evolving through various subgenres from classic rock to alternative, while maintaining its core elements of energy, authenticity, and the ability to connect with audiences through both musical innovation and meaningful lyrics.
[ Read More ](https://en.wikipedia.org/wiki/Rock_music) .
:::
::: panel-tabset
## All time
```{r}
#| label: genre-distribution-combined
#| echo: false
#| warning: false
#| message: false
#| layout-nrow: 1
# Calculate genre distribution with cumulative percentages
genre_dist <- play.activity[, .(plays = .N), by = Genre][
order (- plays)
][
, ` := ` (
percentage = plays / sum (plays) * 100 ,
cumulative_percentage = cumsum (plays) / sum (plays) * 100
)
]
# Get top genre and its percentage
top_genre <- genre_dist[1 ]
top_genre_text <- sprintf ("%s accounts for %.1f%% of all plays" ,
top_genre$ Genre,
top_genre$ percentage)
# Create combined chart with all three visualizations
highchart () |>
hc_chart (
type = "column" ,
backgroundColor = {
list (
linearGradient = list (x1 = 0 , y1 = 0 , x2 = 0 , y2 = 1 ),
stops = list (
list (0 , 'rgba(255, 255, 255, 0.9)' ),
list (1 , 'rgba(255, 255, 255, 0.9)' )
)
)
},
style = list (
backgroundImage = "url('https://upload.wikimedia.org/wikipedia/commons/e/e3/Weathered_guitar_painting_%28Unsplash%29.jpg')" ,
backgroundSize = "cover" ,
backgroundRepeat = "no-repeat" ,
backgroundPosition = "center"
)
) |>
hc_title (
text = "I listened to rock mostly" ,
align = "left"
) |>
hc_subtitle (
text = paste0 (top_genre_text),
align = "left" ,
useHTML = TRUE
) |>
hc_caption (text= "<span style='font-size: 10px; color: #666;'>Background: Maite Tiscar maitetiscar, CC0, via Wikimedia Commons</span>" ) |>
# Rest of the code remains the same as before
hc_xAxis (
categories = genre_dist$ Genre,
labels = list (rotation = - 45 )
) |>
hc_yAxis_multiples (
list (
title = list (text = "Individual Plays" ),
labels = list (format = "{value}" )
),
list (
title = list (text = "Cumulative Percentage" ),
labels = list (format = "{value}%" ),
max = 100 ,
opposite = TRUE
)
) |>
hc_add_series (
name = "Individual Plays" ,
data = genre_dist$ plays,
type = "column" ,
color = "#007AFF80" ,
yAxis = 0
) |>
hc_add_series (
name = "Cumulative %" ,
data = genre_dist$ cumulative_percentage,
type = "line" ,
color = "#FF2D55" ,
yAxis = 1 ,
lineWidth = 3 ,
marker = list (enabled = TRUE , radius = 4 )
) |>
hc_add_series (
type = "pie" ,
name = "Genre Share" ,
data = genre_dist |> as.data.frame () |> list_parse2 (),
center = c ('75%' , '30%' ),
size = 120 ,
innerSize = '60%' ,
showInLegend = FALSE ,
dataLabels = list (enabled = FALSE ),
borderWidth = 0 ,
borderRadius = 5
) |>
hc_tooltip (
shared = TRUE
) |>
hc_plotOptions (
column = list (
borderRadius = 3 ,
borderWidth = 0
),
line = list (zIndex = 2 ),
pie = list (
zIndex = 3 ,
colors = c ("#007AFF" , "#FF2D55" , "#5856D6" , "#FF9500" , "#4CD964" , "#1D1D1F" )
)
) |>
hc_credits (
enabled = TRUE ,
text = "Heteroscedasticity" ,
href = "https://blog.asitavsen.com" ,
style = list (fontSize = "12px" )
) |>
hc_exporting (
enabled = TRUE ,
filename = "Genre_Distribution_Complete"
) |>
hc_add_theme (apple_theme)
```
## 2023 vs 2024
```{r}
#| label: genre-change
#| echo: false
#| warning: false
#| message: false
# Get top genres for each year
top_2023 <- play.activity[
year (as.Date (Event.Start.Timestamp)) == 2023 ,
.(plays = .N),
by = .(Genre)
][order (- plays)][1 : 10 ][, ` := ` (rank = 1 : .N, year = 2023 )]
top_2024 <- play.activity[
year (as.Date (Event.Start.Timestamp)) == 2024 ,
.(plays = .N),
by = .(Genre)
][order (- plays)][1 : 10 ][, ` := ` (rank = 1 : .N, year = 2024 )]
# Get all unique genres
all_genres <- unique (c (top_2023$ Genre, top_2024$ Genre))
# Ensure each genre appears in both years
bump_data <- rbindlist (list (
top_2023,
data.table (
Genre = setdiff (all_genres, top_2023$ Genre),
plays = 0 ,
rank = 11 ,
year = 2023
),
top_2024,
data.table (
Genre = setdiff (all_genres, top_2024$ Genre),
plays = 0 ,
rank = 11 ,
year = 2024
)
))
# Add vertical offset for labels based on rank
bump_data[, label_offset : = ifelse (rank %% 2 == 0 , 0.3 , - 0.3 )]
# Extended distinct colors palette
distinct_colors <- c (
"#E41A1C" , "#377EB8" , "#4DAF4A" , "#984EA3" , "#FF7F00" ,
"#FFFF33" , "#A65628" , "#F781BF" , "#1B9E77" , "#D95F02" ,
"#7570B3" , "#E7298A" , "#66A61E" , "#E6AB02" , "#A6761D"
)[1 : length (unique (bump_data$ Genre))]
p <- ggplot (bump_data[rank <= 10 ], aes (x = year, y = rank, color = Genre)) +
# Add background grid for better visibility
geom_segment (data = data.frame (y = 1 : 10 ),
aes (x = 2023 , xend = 2024 , y = y, yend = y),
color = "grey90" , linewidth = 0.5 ) +
# Increase line width and adjust smoothness
geom_bump (linewidth = 2 , smooth = 8 ) +
geom_point (size = 5 ) +
# 2023 labels
geom_text (data = bump_data[year == 2023 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset,
text = paste ("Genre:" , Genre, "<br>Plays:" , plays)),
hjust = - 0.5 ,
nudge_x = 0.1 ,
size = 3 ) +
# 2024 labels
geom_text (data = bump_data[year == 2024 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset,
text = paste ("Genre:" , Genre, "<br>Plays:" , plays)),
hjust = 1.5 ,
nudge_x = - 0.1 ,
size = 3 ) +
scale_y_reverse (breaks = 1 : 10 ) +
scale_x_continuous (breaks = c (2023 , 2024 )) +
scale_color_manual (values = distinct_colors) +
labs (
title = "Rock was too heavy to move" ,
subtitle = "Top 10 Genres: 2023 vs 2024" ,
x = "Year" ,
y = "Rank" ,
color = "Genres" ,
caption = "Source: Heteroscedasticity | blog.asitavsen.com"
) +
theme_minimal () +
theme (
plot.title = element_text (size = 16 , face = "bold" , hjust = 0 ),
plot.subtitle = element_text (size = 12 , hjust = 0 ),
plot.caption = element_text (size = 8 , color = "grey30" , hjust = 1 ),
legend.position = "right" ,
legend.title = element_text (face = "bold" ),
panel.grid.minor = element_blank (),
panel.grid.major.x = element_blank (),
panel.grid.major.y = element_blank (),
axis.text = element_text (size = 10 )
) +
coord_cartesian (ylim = c (10.5 , 0.5 ))
# Convert to interactive plot
interactive_plot <- ggplotly (p) |>
layout (
hoverlabel = list (bgcolor = "white" ),
autosize = TRUE ,
margin = list (l = 50 , r = 50 , b = 50 , t = 50 )
)
# Display the interactive plot
hide_legend (interactive_plot)
```
:::
- *Pink Floyd* emerges as the dominant artist with an impressive 2,400+ plays, showcasing my strong preference for their music
- [ *Farida Khanum* ](https://en.wikipedia.org/wiki/Farida_Khanum) , with over 700 plays, stands as the second most-played artist, though significantly behind Pink Floyd
- Year-over-year analysis shows interesting shifts in artist preferences:
- *Pink Floyd* maintained top position in both 2023 and 2024
- Only three artists (*Pink Floyd*, [*Kishore Kumar*](https://en.wikipedia.org/wiki/Kishore_Kumar), and [Anjan Dutta](https://en.wikipedia.org/wiki/Anjan_Dutt)& [*Somlata Acharyya Chowdhury*](https://en.wikipedia.org/wiki/Somlata_Acharyya_Chowdhury)) remained in the top 10 across both years
- This indicates significant variation in listening patterns between years
::: column-margin
Pink Floyd is an iconic British rock band that revolutionized progressive rock through their philosophical lyrics, intricate compositions, and groundbreaking psychedelic soundscapes. Their masterpieces like "The Dark Side of the Moon" and "The Wall" are known for their conceptual depth, innovative studio techniques, and haunting explorations of themes like alienation, mental health, and societal pressure, making them one of the most influential and commercially successful bands in music history. [ Read More ](https://en.wikipedia.org/wiki/Pink_Floyd)
:::
::: panel-tabset
## All Time
```{r}
#| label: artist-distribution-combined
#| echo: false
#| warning: false
#| message: false
# Calculate artist distribution with cumulative percentages
artist_dist <- play.activity[, .(plays = .N), by = Artist][
order (- plays)
][
, ` := ` (
percentage = plays / sum (plays) * 100 ,
cumulative_percentage = cumsum (plays) / sum (plays) * 100
)
][1 : 10 ] # Top 10 artists
# Get top artist and its percentage
top_artist <- artist_dist[1 ]
top_artist_text <- sprintf ("%s accounts for %.1f%% of all plays" ,
top_artist$ Artist,
top_artist$ percentage)
# Create combined chart with all three visualizations
highchart () |>
hc_chart (
type = "column" ,
backgroundColor = {
list (
linearGradient = list (x1 = 0 , y1 = 0 , x2 = 0 , y2 = 1 ),
stops = list (
list (0 , 'rgba(255, 255, 255, 0.75)' ), # Increased transparency
list (1 , 'rgba(255, 255, 255, 0.75)' ) # Increased transparency
)
)
},
style = list (
backgroundImage = "url('https://upload.wikimedia.org/wikipedia/commons/f/f4/Battersea_Power_Station%2C_return_of_the_Pink_Floyd_pig_-_geograph.org.uk_-_2618691.jpg')" ,
#backgroundSize = "10% 10%", # Changed to full coverage
# backgroundRepeat = "no-repeat",
backgroundPosition = "center" ,
backgroundBlendMode = "soft-light" # Added blend mode
)
) |>
hc_title (
text = "I listen to Pink Floyd a Lot!" ,
align = "left" ,
style = list (
textShadow = "2px 2px 4px rgba(255, 255, 255, 0.8)" # Added text shadow
)
) |>
hc_subtitle (
text = top_artist_text,
align = "left" ,
useHTML = TRUE ,
style = list (
textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)" # Added text shadow
)
) |>
hc_caption (text= "<br><span style='font-size: 10px; color: #666;'>Background: Battersea Power Station: return of the Pink Floyd pig by Christopher Hilton, CC BY-SA 2.0 <https://creativecommons.org/licenses/by-sa/2.0>, via Wikimedia Commons</span>" ) |>
# Rest of the code remains the same
hc_add_series (
name = "Individual Plays" ,
data = artist_dist$ plays,
type = "column" ,
color = "#007AFF80" ,
yAxis = 0
) |>
hc_add_series (
name = "Cumulative %" ,
data = artist_dist$ cumulative_percentage,
type = "line" ,
color = "#FF2D55" ,
yAxis = 1 ,
lineWidth = 3 ,
marker = list (enabled = TRUE , radius = 4 )
) |>
hc_add_series (
type = "pie" ,
name = "Artist Share" ,
data = artist_dist |> as.data.frame () |> list_parse2 (),
center = c ('75%' , '30%' ),
size = 120 ,
innerSize = '60%' ,
showInLegend = FALSE ,
dataLabels = list (enabled = FALSE ),
borderWidth = 0 ,
borderRadius = 5
) |>
hc_xAxis (
categories = artist_dist$ Artist,
labels = list (
rotation = - 45 ,
style = list (textShadow = "1px 1px 2px rgba(255, 255, 255, 0.8)" ) # Added text shadow
)
) |>
hc_yAxis_multiples (
list (
title = list (text = "Individual Plays" ),
labels = list (format = "{value}" )
),
list (
title = list (text = "Cumulative Percentage" ),
labels = list (format = "{value}%" ),
max = 100 ,
opposite = TRUE
)
) |>
hc_tooltip (
shared = TRUE
) |>
hc_plotOptions (
column = list (
borderRadius = 3 ,
borderWidth = 0
),
line = list (zIndex = 2 ),
pie = list (
zIndex = 3 ,
colors = c ("#007AFF" , "#FF2D55" , "#5856D6" , "#FF9500" , "#4CD964" , "#1D1D1F" )
)
) |>
hc_credits (
enabled = TRUE ,
text = "Heteroscedasticity" ,
href = "https://blog.asitavsen.com" ,
style = list (fontSize = "12px" )
) |>
hc_exporting (
enabled = TRUE ,
filename = "Artist_Distribution_Complete"
) |>
hc_add_theme (apple_theme)
```
## 2023 vs 2024
```{r}
#| label: artist-change
#| echo: false
#| warning: false
#| message: false
# Get top artists for each year
top_2023 <- play.activity[
year (as.Date (Event.Start.Timestamp)) == 2023 ,
.(plays = .N),
by = .(Artist)
][order (- plays)][1 : 10 ][, rank : = 1 : .N][, year : = 2023 ]
top_2024 <- play.activity[
year (as.Date (Event.Start.Timestamp)) == 2024 ,
.(plays = .N),
by = .(Artist)
][order (- plays)][1 : 10 ][, rank : = 1 : .N][, year : = 2024 ]
# Get all unique artists
all_artists <- unique (c (top_2023$ Artist, top_2024$ Artist))
# Ensure each artist appears in both years
bump_data <- rbindlist (list (
top_2023,
data.table (
Artist = setdiff (all_artists, top_2023$ Artist),
plays = 0 ,
rank = 11 ,
year = 2023
),
top_2024,
data.table (
Artist = setdiff (all_artists, top_2024$ Artist),
plays = 0 ,
rank = 11 ,
year = 2024
)
))
# Add vertical offset for labels based on rank
bump_data[, label_offset : = ifelse (rank %% 2 == 0 , 0.3 , - 0.3 )]
# Extended distinct colors palette
distinct_colors <- c (
"#E41A1C" , # red
"#377EB8" , # blue
"#4DAF4A" , # green
"#984EA3" , # purple
"#FF7F00" , # orange
"#FFFF33" , # yellow
"#A65628" , # brown
"#F781BF" , # pink
"#1B9E77" , # sea green
"#D95F02" , # dark orange
"#7570B3" , # slate blue
"#E7298A" , # magenta
"#66A61E" , # lime green
"#E6AB02" , # gold
"#A6761D" , # dark brown
"#666666" , # dark grey
"#E41A1C" , # red
"#377EB8" , # blue
"#4DAF4A" , # green
"#984EA3" # purple
)[1 : length (unique (bump_data$ Artist))]
p <- ggplot (bump_data[rank <= 10 ], aes (x = year, y = rank, color = Artist)) +
# Add background grid for better visibility
geom_segment (data = data.frame (y = 1 : 10 ),
aes (x = 2023 , xend = 2024 , y = y, yend = y),
color = "grey90" , linewidth = 0.5 ) +
# Increase line width and adjust smoothness
geom_bump (linewidth = 2 , smooth = 8 ) +
geom_point (size = 5 ) +
# 2023 labels
geom_text (data = bump_data[year == 2023 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset,
text = paste ("Artist:" , Artist, "<br>Plays:" , plays)),
hjust = - 0.5 ,
nudge_x = 0.1 ,
size = 3 ) +
# 2024 labels
geom_text (data = bump_data[year == 2024 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset,
text = paste ("Artist:" , Artist, "<br>Plays:" , plays)),
hjust = 1.5 ,
nudge_x = - 0.1 ,
size = 3 ) +
scale_y_reverse (breaks = 1 : 10 ) +
scale_x_continuous (breaks = c (2023 , 2024 )) +
scale_color_manual (values = distinct_colors) +
labs (
title = "Everything changed other than Pink Floyd" ,
subtitle = "Top 10 Artists: 2023 vs 2024" ,
x = "Year" ,
y = "Rank" ,
color = "Artists" ,
caption = "Source: Heteroscedasticity | blog.asitavsen.com"
) +
theme_minimal () +
theme (
plot.title = element_text (size = 16 , face = "bold" , hjust = 0 ),
plot.subtitle = element_text (size = 12 , hjust = 0 ),
plot.caption = element_text (size = 8 , color = "grey30" , hjust = 1 ),
legend.position = "right" ,
legend.title = element_text (face = "bold" ),
panel.grid.minor = element_blank (),
panel.grid.major.x = element_blank (),
panel.grid.major.y = element_blank (),
axis.text = element_text (size = 10 )
) +
coord_cartesian (ylim = c (10.5 , 0.5 ))
# Convert to interactive plot
interactive_plot <- ggplotly (p) |>
layout (
hoverlabel = list (bgcolor = "white" ),
autosize = TRUE ,
margin = list (l = 50 , r = 50 , b = 50 , t = 50 )
)
# Display the interactive plot
hide_legend (interactive_plot)
# Save static version
#ggsave("top_artists_rankings.png",
# plot = p,
# width = 12,
# height = 8,
# dpi = 300,
# bg = "white")
# Save interactive version
#htmlwidgets::saveWidget(interactive_plot, "top_artists_rankings_interactive.html")
```
:::
- "*The Dark Side of the Moon*" stands as the most played album with 1,240 plays
- "*The Wall*" follows as the second most popular album
- Temporal analysis reveals:
- "*The Dark Side of the Moon*" maintained its #1 position in both 2023 and 2024
- Only three albums (all by *Pink Floyd*) remained in the top 10 across both years
- This demonstrates both consistency in *Pink Floyd* preference and variation in other listening choices
::: column-margin
"The Dark Side of the Moon" (1973) is Pink Floyd's masterpiece album that explores themes of mental health, time, greed, and human nature through seamlessly connected tracks and innovative sound production. With over 45 million copies sold, it remains one of music's most influential works, known for its immersive sonic experience and profound social commentary. [ Read More ](https://en.wikipedia.org/wiki/The_Dark_Side_of_the_Moon)
:::
::: panel-tabset
## All Time
```{r}
#| label: pink-floyd-top-album
#| echo: false
#| warning: false
#| message: false
# Get album statistics
album_stats <- play.activity[,
.(
plays = .N,
avg_duration = mean (Play.Duration.Milliseconds, na.rm= TRUE )/ 1000 ,
total_duration = sum (Play.Duration.Milliseconds, na.rm= TRUE )/ (1000 * 60 * 60 ) # in hours
),
by = .(Album)
][order (- plays)][! is.na (Album)][1 : 10 ,]
# Create enhanced column chart for albums
highchart () |>
hc_chart (
type = "column" ,
style = list (fontFamily = "SF Pro Display" ),
marginBottom = 100 ,
events = list (
load = JS ("function() {
var chart = this;
chart.renderer.image(
'https://upload.wikimedia.org/wikipedia/en/3/3b/Dark_Side_of_the_Moon.png',
chart.chartWidth - 250, // x position
40, // y position
120, // width
120 // height
)
.attr({
zIndex: 3
})
.css({
opacity: 0.9,
border: '2px solid white',
borderRadius: '5px',
boxShadow: '3px 3px 10px rgba(0,0,0,0.3)'
})
.add();
}" )
)
) |>
hc_title (
text = "Dark side of the moon is the top album" ,
align = "left" ,
style = list (fontSize = "24px" )
) |>
hc_subtitle (
text = "By number of plays and average song duration" ,
align = "left"
) |>
hc_xAxis (
categories = album_stats$ Album,
labels = list (
style = list (fontSize = "12px" ),
rotation = - 45
),
title = list (text = "" )
) |>
hc_yAxis_multiples (
list (
title = list (
text = "Number of Plays" ,
style = list (color = "#007AFF" )
),
labels = list (style = list (color = "#007AFF" )),
gridLineWidth = 0.5
),
list (
title = list (
text = "Average Duration (minutes)" ,
style = list (color = "#FF2D55" )
),
labels = list (style = list (color = "#FF2D55" )),
opposite = TRUE
)
) |>
hc_add_series (
data = album_stats$ plays,
name = "Total Plays" ,
color = "#007AFF" ,
type = "column"
) |>
hc_add_series (
data = round (album_stats$ avg_duration/ 60 , 1 ), # Converting to minutes
name = "Avg Duration" ,
color = "#FF2D55" ,
type = "areaspline" ,
yAxis = 1 ,
fillOpacity = 0.3 ,
marker = list (
enabled = TRUE ,
radius = 4 ,
fillColor = "#FF2D55" ,
lineWidth = 2 ,
lineColor = "#FFFFFF"
)
) |>
hc_caption (text= "<br><span style='font-size: 10px; color: #666;'>Background: Dark_Side_of_the_Moon.png, Fair Use <https://en.wikipedia.org/wiki/Fair_use>, via Wikimedia Commons</span>" ) |>
hc_tooltip (
shared = TRUE ,
headerFormat = "<b>{point.key}</b><br>" ,
pointFormatter = JS ("function() {
if (this.series.name === 'Avg Duration') {
return this.series.name + ': <b>' + this.y + ' minutes</b><br>';
} else {
return this.series.name + ': <b>' + this.y + '</b><br>';
}
}" )
) |>
hc_plotOptions (
column = list (
borderRadius = 3 ,
borderWidth = 0 ,
color = list (
linearGradient = list (x1 = 0 , y1 = 0 , x2 = 0 , y2 = 1 ),
stops = list (
list (0 , "#007AFF" ),
list (1 , "#007AFF80" )
)
)
),
areaspline = list (
fillColor = list (
linearGradient = list (x1 = 0 , x2 = 0 , y1 = 0 , y2 = 1 ),
stops = list (
list (0 , "#FF2D55" ),
list (1 , "#FF2D5500" )
)
),
lineWidth = 2 ,
states = list (
hover = list (
lineWidth = 3
)
)
),
series = list (
animation = list (
duration = 1000
)
)
) |>
hc_legend (
align = "center" ,
verticalAlign = "bottom" ,
layout = "horizontal" ,
backgroundColor = "rgba(255, 255, 255, 0.9)" ,
borderRadius = 5 ,
padding = 12
) |>
hc_credits (
enabled = TRUE ,
text = "Heteroscedasticity" ,
href = "https://blog.asitavsen.com" ,
style = list (fontSize = "12px" )
) |>
hc_exporting (
enabled = TRUE ,
filename = "Pink_Floyd_Albums"
) |>
hc_add_theme (apple_theme)
```
## 2023 vs 2024
```{r}
#| label: album-change
#| echo: false
#| warning: false
#| message: false
# Get album rankings for both years
album_rankings <- play.activity[
year (as.Date (Event.Start.Timestamp)) %in% c (2023 , 2024 ),
.(plays = .N),
by = .(Album, year = year (as.Date (Event.Start.Timestamp)))
][
! is.na (Album)
][
# Calculate rank within each year
, rank : = rank (- plays, ties.method = "first" ),
by = year
][
# Keep only top 10 for each year
rank <= 10
][
order (year, rank)
]
# Get all unique albums
all_albums <- unique (c (
album_rankings[year == 2023 , Album],
album_rankings[year == 2024 , Album]
))
# Ensure each album appears in both years
bump_data <- rbindlist (list (
album_rankings[year == 2023 ],
data.table (
Album = setdiff (all_albums, album_rankings[year == 2023 , Album]),
plays = 0 ,
rank = 11 ,
year = 2023
),
album_rankings[year == 2024 ],
data.table (
Album = setdiff (all_albums, album_rankings[year == 2024 , Album]),
plays = 0 ,
rank = 11 ,
year = 2024
)
))
# Add vertical offset for labels based on rank
bump_data[, label_offset : = ifelse (rank %% 2 == 0 , 0.3 , - 0.3 )]
# Extended distinct colors palette
distinct_colors <- c (
"#E41A1C" , "#377EB8" , "#4DAF4A" , "#984EA3" , "#FF7F00" ,
"#FFFF33" , "#A65628" , "#F781BF" , "#1B9E77" , "#D95F02" ,
"#7570B3" , "#E7298A" , "#66A61E" , "#E6AB02" , "#A6761D"
)[1 : length (unique (bump_data$ Album))]
p <- ggplot (bump_data[rank <= 10 ], aes (x = year, y = rank, color = Album)) +
# Add background grid for better visibility
geom_segment (data = data.frame (y = 1 : 10 ),
aes (x = 2023 , xend = 2024 , y = y, yend = y),
color = "grey90" , linewidth = 0.5 ) +
# Increase line width and adjust smoothness
geom_bump (linewidth = 2 , smooth = 8 ) +
geom_point (size = 5 ) +
# 2023 labels
geom_text (data = bump_data[year == 2023 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset,
text = paste ("Album:" , Album, "<br>Plays:" , plays)),
hjust = - 0.5 ,
nudge_x = 0.1 ,
size = 3 ) +
# 2024 labels
geom_text (data = bump_data[year == 2024 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset,
text = paste ("Album:" , Album, "<br>Plays:" , plays)),
hjust = 1.5 ,
nudge_x = - 0.1 ,
size = 3 ) +
scale_y_reverse (breaks = 1 : 10 ) +
scale_x_continuous (breaks = c (2023 , 2024 )) +
scale_color_manual (values = distinct_colors) +
labs (
title = "Dark side of the moon retained its position" ,
subtitle = "Top 10 Albums: 2023 vs 2024" ,
x = "Year" ,
y = "Rank" ,
color = "Albums" ,
caption = "Source: Heteroscedasticity | blog.asitavsen.com"
) +
theme_minimal () +
theme (
plot.title = element_text (size = 16 , face = "bold" , hjust = 0 ),
plot.subtitle = element_text (size = 12 , hjust = 0 ),
plot.caption = element_text (size = 8 , color = "grey30" , hjust = 1 ),
legend.position = "right" ,
legend.title = element_text (face = "bold" ),
panel.grid.minor = element_blank (),
panel.grid.major.x = element_blank (),
panel.grid.major.y = element_blank (),
axis.text = element_text (size = 10 )
) +
coord_cartesian (ylim = c (10.5 , 0.5 ))
# Convert to interactive plot
interactive_plot <- ggplotly (p) |>
layout (
hoverlabel = list (bgcolor = "white" ),
autosize = TRUE ,
margin = list (l = 50 , r = 50 , b = 50 , t = 50 )
)
# Display the interactive plot
hide_legend (interactive_plot)
```
:::
- "*Breathe (In the Air)*" tops the overall play count among *Pink Floyd* songs
- Interesting year-wise variations:
- 2023: "*Comfortably Numb*" led the charts
- 2024: "*Time*" became the most played track
- This shows evolving preferences even within the same artist's catalog
::: column-margin
"Time" is a powerful track from Pink Floyd's "Dark Side of the Moon" that opens with a dramatic sequence of chiming clocks before launching into a meditation on life's swift passage and wasted opportunities. The song warns about the dangers of complacency and letting life slip away, highlighted by David Gilmour's iconic guitar solos and Roger Waters' introspective lyrics about how people suddenly realize they're "shorter of breath and one day closer to death."
\[ Read More\] (https://en.wikipedia.org/wiki/Time\_(Pink_Floyd_song)
:::
::: panel-tabset
## All Time
```{r}
#| label: pink-floyd-top
#| echo: false
#| warning: false
#| message: false
# Get top 10 songs
top_songs <- play.activity[Artist == "Pink Floyd" ,
.(
plays = .N,
avg_duration = mean (Play.Duration.Milliseconds, na.rm= TRUE )/ 1000
),
by = .(Song.Name)
][order (- plays)][1 : 10 ]
# Create enhanced column chart for top 10 songs
highchart () |>
hc_chart (
type = "column" ,
style = list (fontFamily = "SF Pro Display" ),
marginBottom = 100 # Increased bottom margin for legend
) |>
hc_title (
text = "Top 10 Most Played Pink Floyd Songs" ,
align = "left" ,
style = list (fontSize = "24px" )
) |>
hc_subtitle (
text = "By number of plays and average duration" ,
align = "left"
) |>
hc_xAxis (
categories = top_songs$ Song.Name,
labels = list (
style = list (fontSize = "12px" ),
rotation = - 45
),
title = list (text = "" )
) |>
hc_yAxis_multiples (
list (
title = list (
text = "Number of Plays" ,
style = list (color = "#007AFF" )
),
labels = list (style = list (color = "#007AFF" )),
gridLineWidth = 0.5
),
list (
title = list (
text = "Duration (seconds)" ,
style = list (color = "#FF2D55" )
),
labels = list (style = list (color = "#FF2D55" )),
opposite = TRUE
)
) |>
hc_add_series (
data = top_songs$ plays,
name = "Plays" ,
color = "#007AFF" ,
type = "column"
) |>
hc_add_series (
data = top_songs$ avg_duration,
name = "Duration" ,
color = "#FF2D55" ,
type = "areaspline" ,
yAxis = 1 ,
fillOpacity = 0.3 ,
marker = list (
enabled = TRUE ,
radius = 4 ,
fillColor = "#FF2D55" ,
lineWidth = 2 ,
lineColor = "#FFFFFF"
)
) |>
hc_tooltip (
shared = TRUE ,
headerFormat = "<b>{point.key}</b><br>" ,
pointFormatter = JS ("function() {
if (this.series.name === 'Duration') {
return this.series.name + ': <b>' + Math.round(this.y) + ' seconds</b><br>';
} else {
return this.series.name + ': <b>' + this.y + ' plays</b><br>';
}
}" )
) |>
hc_plotOptions (
column = list (
borderRadius = 3 ,
borderWidth = 0 ,
color = list (
linearGradient = list (x1 = 0 , y1 = 0 , x2 = 0 , y2 = 1 ),
stops = list (
list (0 , "#007AFF" ),
list (1 , "#007AFF80" )
)
)
),
areaspline = list (
fillColor = list (
linearGradient = list (x1 = 0 , x2 = 0 , y1 = 0 , y2 = 1 ),
stops = list (
list (0 , "#FF2D55" ),
list (1 , "#FF2D5500" )
)
),
lineWidth = 2 ,
states = list (
hover = list (
lineWidth = 3
)
)
),
series = list (
animation = list (
duration = 1000
)
)
) |>
hc_legend (
align = "center" ,
verticalAlign = "bottom" ,
layout = "horizontal" ,
backgroundColor = "rgba(255, 255, 255, 0.9)" ,
borderRadius = 5 ,
padding = 12
) |>
hc_credits (
enabled = TRUE ,
text = "Heteroscedasticity" ,
href = "https://blog.asitavsen.com" ,
style = list (fontSize = "12px" )
) |>
hc_exporting (
enabled = TRUE ,
filename = "Top_10_Songs"
) |>
hc_add_theme (apple_theme)
```
## 2023 vs 2024
```{r}
#| label: pink-floyd-top-change
#| echo: false
#| warning: false
#| message: false
# Get top Pink Floyd songs for each year
top_2023 <- play.activity[
year (as.Date (Event.Start.Timestamp)) == 2023 &
Artist == "Pink Floyd" ,
.(plays = .N),
by = .(Song.Name)
][order (- plays)][1 : 10 ][, rank : = 1 : .N][, year : = 2023 ]
top_2024 <- play.activity[
year (as.Date (Event.Start.Timestamp)) == 2024 &
Artist == "Pink Floyd" ,
.(plays = .N),
by = .(Song.Name)
][order (- plays)][1 : 10 ][, rank : = 1 : .N][, year : = 2024 ]
# Get all unique songs
all_songs <- unique (c (top_2023$ Song.Name, top_2024$ Song.Name))
# Ensure each song appears in both years
bump_data <- rbindlist (list (
top_2023,
data.table (
Song.Name = setdiff (all_songs, top_2023$ Song.Name),
plays = 0 ,
rank = 11 ,
year = 2023
),
top_2024,
data.table (
Song.Name = setdiff (all_songs, top_2024$ Song.Name),
plays = 0 ,
rank = 11 ,
year = 2024
)
))
# Add vertical offset for labels based on rank
bump_data[, label_offset : = ifelse (rank %% 2 == 0 , 0.3 , - 0.3 )]
# Create the plot with ggplot2
p <- ggplot (bump_data[rank <= 10 ], aes (x = year, y = rank, color = Song.Name)) +
# Add background grid for better visibility
geom_segment (data = data.frame (y = 1 : 10 ),
aes (x = 2023 , xend = 2024 , y = y, yend = y),
color = "grey90" , linewidth = 0.5 ) +
# Increase line width and adjust smoothness
geom_bump (linewidth = 2 , smooth = 8 ) +
geom_point (size = 5 ) +
# 2023 labels
geom_text (data = bump_data[year == 2023 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset),
hjust = - 0.5 ,
nudge_x = 0.1 ,
size = 3 ) +
# 2024 labels
geom_text (data = bump_data[year == 2024 & rank <= 10 ],
aes (label = plays,
y = rank + label_offset),
hjust = 1.5 ,
nudge_x = - 0.1 ,
size = 3 ) +
scale_y_reverse (breaks = 1 : 10 ) +
scale_x_continuous (breaks = c (2023 , 2024 )) +
scale_color_brewer (palette = "Set3" ) +
labs (
title = "Comfortable Numb gave way to Time" ,
subtitle = "Top 10 Pink Floyd Songs: 2023 vs 2024" ,
x = "Year" ,
y = "Rank" ,
color = "Songs" ,
caption = "Source: Heteroscedasticity | blog.asitavsen.com"
) +
theme_minimal () +
theme (
plot.title = element_text (size = 16 , face = "bold" , hjust = 0 ),
plot.subtitle = element_text (size = 12 , hjust = 0 ),
plot.caption = element_text (size = 8 , color = "grey30" , hjust = 1 ),
legend.position = "right" ,
legend.title = element_text (face = "bold" ),
panel.grid.minor = element_blank (),
panel.grid.major.x = element_blank (),
panel.grid.major.y = element_blank (),
axis.text = element_text (size = 10 )
) +
coord_cartesian (ylim = c (10.5 , 0.5 ))
# Make it interactive with plotly
fig <- ggplotly (p, tooltip = c ("Song.Name" , "rank" , "plays" )) |>
layout (hoverlabel = list (bgcolor = "white" ))
# Display the plot
hide_legend (fig)
```
:::