3D plots in R

Three-dimensional graphs are useful for visualizing linear regression. Below are two examples of graphs that I prepared to be displayed during lectures in a statistics course.

The first example is a visualization of multiple regression. When we have two quantitative explanatory variables, the regression model is not a line, but a plane in a three-dimensional space:

library(plotly)
library(MASS)
set.seed(123)
  #Symulowane dane z rozkładu normalnego trójwymiarowego
sim <- MASS::mvrnorm(50, mu = c(0, 0, 0), 
                     Sigma = matrix(c(1, 0.7, .6, 0.7, 1, .6, .6, .6, 1),
                      nrow = 3)) %>% round(., 2)
df <- data.frame(x1 = sim[, 1], x2 = sim[, 2], y = sim[, 3])

fig <- plot_ly() %>%
  add_trace(
    data = df, x = ~ x1, y = ~ x2, z = ~ y,
    type = "scatter3d", mode = "markers", hoverinfo = 'text',
    text = ~ paste0("x1:  ", x1, "<br>x2:  ", x2, "<br>  y:  ", y)
  ) %>%
  layout(scene = list(
    xaxis = list(range = c(-2.5, 2.5)),
    yaxis = list(range = c(-2.5, 2.5)),
    zaxis = list(range = c(-2.5, 2.5))
  ))
  # model regresji
lm_model <- lm(y ~ x1 + x2, data = df)
  # wartości dopasowane
df$yhat <- lm_model$fitted.values
  # siatka punktów - płaszczyzna regresji
x1_grid <- seq(-2.5, 2.5, length.out = 6)
x2_grid <- seq(-2.5, 2.5, length.out = 6)
grid_df <- expand.grid(x1 = x1_grid, x2 = x2_grid)
grid_df$predicted_y <- predict(lm_model, newdata = grid_df)

for (i in 1:length(x1_grid)) {
  fig <- fig %>% add_trace(
    x = c(x1_grid[i], x1_grid[i]),
    y = c(-3, 3),
    z = c(
      predict(lm_model, newdata = data.frame(x1 = x1_grid[i], x2 = -3)),
      predict(lm_model, newdata = data.frame(x1 = x1_grid[i], x2 = 3))
    ),
    type = "scatter3d", mode = "lines", hoverinfo = 'none',
    line = list(
      width = 1, color = "black", opacity = 0.3
    )
  )
}

for (i in 1:length(x2_grid)) {
  fig <- fig %>% add_trace(
    x = c(-3, 3),
    y = c(x2_grid[i], x2_grid[i]),
    z = c(
      predict(lm_model, newdata = data.frame(x1 = -3, x2 = x2_grid[i])),
      predict(lm_model, newdata = data.frame(x1 = 3, x2 = x2_grid[i]))
    ),
    type = "scatter3d", mode = "lines", hoverinfo = 'none',
    line = list(
      width = 1, color = "black", opacity = 0.3
    )
  )
}

  # płaszczyzna regresji - powierzchnia
fig <- fig %>% layout(showlegend = FALSE) %>%
  add_trace(
    data = grid_df, x = ~ x1, y = ~ x2, z = ~ predicted_y,
    type = "mesh3d", opacity = 0.5, hoverinfo = 'none'
  )
  # wartości dopasowane
fig <- fig %>%
  add_trace(
    data = df,
    x = ~ x1, y = ~ x2, z = ~ yhat,
    type = "scatter3d",
    mode = "markers",
    opacity = .6,
    marker = list(color = 'red', size = 5),
    hoverinfo = 'text',
    text = ~ paste0("x1:  ", x1, "<br>x2:  ", x2, "<br>  yhat:  ", round(yhat, 4))
  )

  # reszty
for (i in 1:length(df$yhat)) {
  fig <- fig %>% add_trace(
    x = c(df$x1[i], df$x1[i]),
    y = c(df$x2[i], df$x2[i]),
    z = c(df$y[i], df$yhat[i]),
    type = "scatter3d",
    mode = "lines",
    hoverinfo = 'none',
    line = list(width = 5, color = "red")
  )
}
fig

The second example is a chart picked up from similar chart in a book by Westfall and Arias entitled “Understanding Regression Analysis: A Conditional Distribution Approach.” This chart helps to look at regression as a model that returns a conditional distribution of the explanatory variable:

library(plotly)
n<-180
sigma <- 4
set.seed(123)
x <- rnorm(n, 10, 5)
intercept <- 50
slope <- 2
y <- intercept + slope*x + rnorm(n, 0, sigma)
#plot(x,y)

df<-data.frame(x = x, y=y, z = rep(0,n))
xline <- seq(0, 20, 1)
yline <- seq(40, 100, .1)
regline <- data.frame(x=xline, y=intercept + slope*xline, z=0)
dfline1 <- data.frame(x=rep(5, length(yline)), y =yline, z=dnorm(yline, intercept+slope*5, sigma))
dfline2 <- data.frame(x=rep(10, length(yline)), y =yline, z=dnorm(yline, intercept+slope*10, sigma))
dfline3 <- data.frame(x=rep(15, length(yline)), y =yline, z=dnorm(yline, intercept+slope*15, sigma))

fig <- plot_ly() %>% 
  add_trace(data = df, x = ~x, y = ~y, z = ~z, type = "scatter3d", mode = "markers", marker  = list(size = 2, color='black')) %>%
  add_trace(data = regline, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
            line = list(color = 'darkgreen', width = 2)) %>%
  add_trace(data = dfline1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
            line = list(color = 'skyblue', width = 2)) %>%
  add_trace(data = dfline2, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
            line = list(color = 'blue', width = 2)) %>%
  add_trace(data = dfline3, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'lines',
            line = list(color = 'darkblue', width = 2)) %>%
  layout(scene = list(
    xaxis = list(title = "Liczba godzin nauki", range = c(0, 20)),
    yaxis = list(title = "Wynik egzaminu", range = c(40, 100)),
    zaxis = list(title = "", range = c(0, dnorm(0, 0, sigma)*1.2), showgrid = FALSE, visible=FALSE),
    camera = list(
      eye = list(x = -1, y = -2, z = .5)  # Adjust these values to change the camera view
    )
  ),
  showlegend = FALSE) 
fig

For posts on R from other bloggers, see R-bloggers.

Błażej Kochański
Błażej Kochański
Banking Risk Expert, Researcher and Management Consultant