3D plots in R
Three-dimensional graphs are useful for visualizing linear regression. Below are two examples of graphs that I prepared to be displayed during lectures in a statistics course.
The first example is a visualization of multiple regression. When we have two quantitative explanatory variables, the regression model is not a line, but a plane in a three-dimensional space:
library(plotly)
library(MASS)
set.seed(123)
#Symulowane dane z rozkładu normalnego trójwymiarowego
sim <- MASS::mvrnorm(50, mu = c(0, 0, 0),
Sigma = matrix(c(1, 0.7, .6, 0.7, 1, .6, .6, .6, 1),
nrow = 3)) %>% round(., 2)
df <- data.frame(x1 = sim[, 1], x2 = sim[, 2], y = sim[, 3])
fig <- plot_ly() %>%
add_trace(
data = df, x = ~ x1, y = ~ x2, z = ~ y,
type = "scatter3d", mode = "markers", hoverinfo = 'text',
text = ~ paste0("x1: ", x1, "<br>x2: ", x2, "<br> y: ", y)
) %>%
layout(scene = list(
xaxis = list(range = c(-2.5, 2.5)),
yaxis = list(range = c(-2.5, 2.5)),
zaxis = list(range = c(-2.5, 2.5))
))
# model regresji
lm_model <- lm(y ~ x1 + x2, data = df)
# wartości dopasowane
df$yhat <- lm_model$fitted.values
# siatka punktów - płaszczyzna regresji
x1_grid <- seq(-2.5, 2.5, length.out = 6)
x2_grid <- seq(-2.5, 2.5, length.out = 6)
grid_df <- expand.grid(x1 = x1_grid, x2 = x2_grid)
grid_df$predicted_y <- predict(lm_model, newdata = grid_df)
for (i in 1:length(x1_grid)) {
fig <- fig %>% add_trace(
x = c(x1_grid[i], x1_grid[i]),
y = c(-3, 3),
z = c(
predict(lm_model, newdata = data.frame(x1 = x1_grid[i], x2 = -3)),
predict(lm_model, newdata = data.frame(x1 = x1_grid[i], x2 = 3))
),
type = "scatter3d", mode = "lines", hoverinfo = 'none',
line = list(
width = 1, color = "black", opacity = 0.3
)
)
}
for (i in 1:length(x2_grid)) {
fig <- fig %>% add_trace(
x = c(-3, 3),
y = c(x2_grid[i], x2_grid[i]),
z = c(
predict(lm_model, newdata = data.frame(x1 = -3, x2 = x2_grid[i])),
predict(lm_model, newdata = data.frame(x1 = 3, x2 = x2_grid[i]))
),
type = "scatter3d", mode = "lines", hoverinfo = 'none',
line = list(
width = 1, color = "black", opacity = 0.3
)
)
}
# płaszczyzna regresji - powierzchnia
fig <- fig %>% layout(showlegend = FALSE) %>%
add_trace(
data = grid_df, x = ~ x1, y = ~ x2, z = ~ predicted_y,
type = "mesh3d", opacity = 0.5, hoverinfo = 'none'
)
# wartości dopasowane
fig <- fig %>%
add_trace(
data = df,
x = ~ x1, y = ~ x2, z = ~ yhat,
type = "scatter3d",
mode = "markers",
opacity = .6,
marker = list(color = 'red', size = 5),
hoverinfo = 'text',
text = ~ paste0("x1: ", x1, "<br>x2: ", x2, "<br> yhat: ", round(yhat, 4))
)
# reszty
for (i in 1:length(df$yhat)) {
fig <- fig %>% add_trace(
x = c(df$x1[i], df$x1[i]),
y = c(df$x2[i], df$x2[i]),
z = c(df$y[i], df$yhat[i]),
type = "scatter3d",
mode = "lines",
hoverinfo = 'none',
line = list(width = 5, color = "red")
)
}
fig
The second example is a chart picked up from similar chart in a book by Westfall and Arias entitled “Understanding Regression Analysis: A Conditional Distribution Approach.” This chart helps to look at regression as a model that returns a conditional distribution of the explanatory variable:
library(plotly)
n<-180
sigma <- 4
set.seed(123)
x <- rnorm(n, 10, 5)
intercept <- 50
slope <- 2
y <- intercept + slope*x + rnorm(n, 0, sigma)
#plot(x,y)
df<-data.frame(x = x, y=y, z = rep(0,n))
xline <- seq(0, 20, 1)
yline <- seq(40, 100, .1)
regline <- data.frame(x=xline, y=intercept + slope*xline, z=0)
dfline1 <- data.frame(x=rep(5, length(yline)), y =yline, z=dnorm(yline, intercept+slope*5, sigma))
dfline2 <- data.frame(x=rep(10, length(yline)), y =yline, z=dnorm(yline, intercept+slope*10, sigma))
dfline3 <- data.frame(x=rep(15, length(yline)), y =yline, z=dnorm(yline, intercept+slope*15, sigma))
fig <- plot_ly() %>%
add_trace(data = df, x = ~x, y = ~y, z = ~z, type = "scatter3d", mode = "markers", marker = list(size = 2, color='black')) %>%
add_trace(data = regline, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
line = list(color = 'darkgreen', width = 2)) %>%
add_trace(data = dfline1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
line = list(color = 'skyblue', width = 2)) %>%
add_trace(data = dfline2, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
line = list(color = 'blue', width = 2)) %>%
add_trace(data = dfline3, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
line = list(color = 'darkblue', width = 2)) %>%
layout(scene = list(
xaxis = list(title = "Liczba godzin nauki", range = c(0, 20)),
yaxis = list(title = "Wynik egzaminu", range = c(40, 100)),
zaxis = list(title = "", range = c(0, dnorm(0, 0, sigma)*1.2), showgrid = FALSE, visible=FALSE),
camera = list(
eye = list(x = -1, y = -2, z = .5) # Adjust these values to change the camera view
)
),
showlegend = FALSE)
fig
For posts on R from other bloggers, see R-bloggers.