The hexbin
mark creates hexagonal binning
visualizations, perfect for displaying density in large scatter plots
where individual points would overlap and create overplotting
issues.
Customization Options
Custom Color Scheme
# Bivariate normal distribution
set.seed(456)
n <- 2000
bivar_data <- data.frame(
x = rnorm(n, 0, 1),
y = rnorm(n, 0, 1)
)
spec_colored <- list(
plot = list(
list(
mark = "hexbin",
data = list(from = "bivar_data"),
x = "x",
y = "y",
fill = list(count = NULL),
fillOpacity = 0.8
)
),
colorScheme = "viridis"
)
mosaic(spec_colored, bivar_data = bivar_data)
Multiple Distributions
# Create data with multiple clusters
set.seed(789)
n_per_cluster <- 500
cluster_data <- data.frame(
x = c(
rnorm(n_per_cluster, 20, 5), # Cluster 1
rnorm(n_per_cluster, 60, 8), # Cluster 2
rnorm(n_per_cluster, 40, 6) # Cluster 3
),
y = c(
rnorm(n_per_cluster, 30, 7), # Cluster 1
rnorm(n_per_cluster, 70, 5), # Cluster 2
rnorm(n_per_cluster, 20, 4) # Cluster 3
)
)
spec_clusters <- list(
plot = list(
list(
mark = "hexbin",
data = list(from = "cluster_data"),
x = "x",
y = "y",
fill = list(count = NULL)
)
),
colorScheme = "plasma"
)
mosaic(spec_clusters, cluster_data = cluster_data)
Real-world Example
# Using a larger dataset (simulated real-world scenario)
set.seed(999)
n <- 5000
# Simulate correlation between variables
x_vals <- rnorm(n, 100, 20)
y_vals <- 0.8 * x_vals + rnorm(n, 0, 15)
realworld_data <- data.frame(
measurement_a = x_vals,
measurement_b = y_vals
)
spec_realworld <- list(
plot = list(
list(
mark = "hexbin",
data = list(from = "realworld_data"),
x = "measurement_a",
y = "measurement_b",
fill = list(count = NULL)
)
),
xLabel = "Measurement A",
yLabel = "Measurement B",
colorScheme = "turbo"
)
mosaic(spec_realworld, realworld_data = realworld_data)
Use Cases
- Large Datasets: Essential for visualizing datasets with thousands or millions of points
- Density Visualization: Shows where data points are most concentrated
- Overplotting Solution: Replaces scatter plots when points overlap too much
- Pattern Detection: Reveals patterns that might be hidden in dense scatter plots
- Statistical Analysis: Useful for exploring bivariate distributions and correlations