# Install if needed (uncomment to run)
# install.packages("ggplot2")
# Load the package
library(ggplot2)
# We'll use the built-in mtcars dataset
data(mtcars)
Introduction to ggplot2 Graphics
Introduction to ggplot2 Graphics
The ggplot2
package, part of the tidyverse, implements the Grammar of Graphics to create elegant and complex plots with a consistent syntax. It’s one of the most popular visualization packages in R.
Getting Started with ggplot2
First, let’s install and load the package:
The Grammar of Graphics
ggplot2 is based on the idea that you can build any plot from the same components:
- Data: The dataset you want to visualize
- Aesthetics: Mapping of variables to visual properties
- Geometries: Visual elements representing data points
- Facets: For creating small multiples
- Statistics: Statistical transformations of the data
- Coordinates: The coordinate system
- Themes: Controlling the visual style
Basic Plot Structure
Every ggplot2 plot starts with the ggplot()
function and builds with layers:
# Basic scatter plot
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point()
Common Geometries (geoms)
Scatter Plots
# Enhanced scatter plot
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl), size = hp)) +
geom_point(alpha = 0.7) +
labs(
title = "Car Weight vs. Fuel Efficiency",
subtitle = "Colored by cylinder count, sized by horsepower",
x = "Weight (1000 lbs)",
y = "Miles Per Gallon",
color = "Cylinders",
size = "Horsepower"
)
Line Plots
# Create sample time series data
<- data.frame(
time_data time = 1:20,
value = cumsum(rnorm(20))
)
# Line plot
ggplot(time_data, aes(x = time, y = value)) +
geom_line(color = "steelblue", size = 1) +
geom_point(color = "steelblue", size = 2) +
labs(title = "Time Series Plot", x = "Time", y = "Value")
Bar Charts
# Count of cars by cylinder
ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar(fill = "steelblue") +
labs(title = "Count of Cars by Cylinder", x = "Cylinders", y = "Count")
# Bar chart with values
<- as.data.frame(table(mtcars$cyl))
cyl_summary names(cyl_summary) <- c("cyl", "count")
ggplot(cyl_summary, aes(x = cyl, y = count)) +
geom_col(fill = "steelblue") +
geom_text(aes(label = count), vjust = -0.5) +
labs(title = "Count of Cars by Cylinder", x = "Cylinders", y = "Count")
Histograms and Density Plots
# Histogram
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(bins = 10, fill = "steelblue", color = "white") +
labs(title = "Distribution of Fuel Efficiency", x = "Miles Per Gallon", y = "Count")
# Density plot
ggplot(mtcars, aes(x = mpg)) +
geom_density(fill = "steelblue", alpha = 0.5) +
labs(title = "Density of Fuel Efficiency", x = "Miles Per Gallon", y = "Density")
# Combined histogram and density
ggplot(mtcars, aes(x = mpg)) +
geom_histogram(aes(y = ..density..), bins = 10, fill = "lightblue", color = "white") +
geom_density(color = "darkblue", size = 1) +
labs(title = "Distribution of Fuel Efficiency", x = "Miles Per Gallon", y = "Density")
Box Plots
# Box plot
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Fuel Efficiency by Cylinder Count", x = "Cylinders", y = "Miles Per Gallon")
# Box plot with points
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "lightblue", outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
labs(title = "Fuel Efficiency by Cylinder Count", x = "Cylinders", y = "Miles Per Gallon")
Customizing Aesthetics
You can map variables to various aesthetic properties:
# Multiple aesthetics
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl), shape = factor(am), size = hp)) +
geom_point(alpha = 0.7) +
labs(
title = "Car Weight vs. Fuel Efficiency",
x = "Weight (1000 lbs)",
y = "Miles Per Gallon",
color = "Cylinders",
shape = "Transmission",
size = "Horsepower"
+
) scale_shape_discrete(labels = c("Automatic", "Manual"))
Faceting (Small Multiples)
Faceting creates separate plots for subsets of data:
# Facet by transmission type
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point() +
facet_wrap(~am, labeller = labeller(am = c("0" = "Automatic", "1" = "Manual"))) +
labs(title = "Weight vs. MPG by Transmission Type", x = "Weight", y = "MPG", color = "Cylinders")
# Facet grid with two variables
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
facet_grid(vs ~ gear, labeller = labeller(
vs = c("0" = "V-Engine", "1" = "Straight Engine"),
gear = c("3" = "3 Gears", "4" = "4 Gears", "5" = "5 Gears")
+
)) labs(title = "Weight vs. MPG by Engine Type and Gear Count")
Adding Statistics
ggplot2 can add statistical summaries to plots:
# Scatter plot with linear regression line
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Weight vs. MPG with Linear Trend", x = "Weight", y = "MPG")
# Scatter plot with different smoothing methods by cylinder
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Weight vs. MPG by Cylinder", x = "Weight", y = "MPG", color = "Cylinders")
Coordinate Systems
Change how the data is mapped to the plotting area:
# Flip coordinates
ggplot(mtcars, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "lightblue") +
coord_flip() +
labs(title = "Fuel Efficiency by Cylinder Count", x = "Cylinders", y = "Miles Per Gallon")
# Polar coordinates for a pie chart
ggplot(cyl_summary, aes(x = "", y = count, fill = cyl)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Cars by Cylinder Count", fill = "Cylinders") +
theme_void()
Themes
Themes control the non-data elements of the plot:
# Default theme
<- ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
p geom_point() +
labs(title = "Weight vs. MPG by Cylinder Count", x = "Weight", y = "MPG", color = "Cylinders")
# Different built-in themes
+ theme_minimal() p
+ theme_classic() p
+ theme_dark() p
+ theme_bw() p
Custom Theme Elements
You can customize specific theme elements:
ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
geom_point(size = 3) +
labs(
title = "Custom Themed Plot",
subtitle = "Weight vs. MPG by Cylinder Count",
x = "Weight (1000 lbs)",
y = "Miles Per Gallon",
color = "Cylinders"
+
) theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray50"),
axis.title = element_text(size = 12, face = "bold"),
legend.position = "top",
legend.background = element_rect(fill = "lightyellow", color = "gray"),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_line(color = "gray90"),
panel.grid.minor = element_line(color = "gray95")
)
Combining Multiple Plots
The patchwork
package makes it easy to combine multiple ggplots:
# Create three different plots
if (requireNamespace("patchwork", quietly = TRUE)) {
library(patchwork)
<- ggplot(mtcars, aes(x = wt, y = mpg)) +
p1 geom_point() +
labs(title = "Weight vs. MPG")
<- ggplot(mtcars, aes(x = hp, y = mpg)) +
p2 geom_point(color = "red") +
labs(title = "Horsepower vs. MPG")
<- ggplot(mtcars, aes(x = factor(cyl))) +
p3 geom_bar(fill = "steelblue") +
labs(title = "Count by Cylinders")
# Combine plots
+ p2 + p3 + plot_layout(ncol = 2)
p1 else {
} message("The patchwork package is not installed. Install with: install.packages('patchwork')")
}
Saving ggplot2 Plots
# Create a plot to save
<- ggplot(mtcars, aes(x = wt, y = mpg, color = factor(cyl))) +
p geom_point(size = 3) +
labs(title = "Weight vs. MPG", x = "Weight", y = "MPG", color = "Cylinders") +
theme_minimal()
# Example of how to save (not run)
# ggsave("my_ggplot.png", plot = p, width = 8, height = 6, dpi = 300)
# ggsave("my_ggplot.pdf", plot = p, width = 8, height = 6)
ggplot2 offers a powerful and flexible system for creating visualizations in R. Its consistent syntax and layered approach make it possible to create both simple and complex plots with the same basic structure.