install.packages("DALEX") install.packages("rms") install.packages("randomForest") install.packages("gbm") install.packages("e1071") install.packages("ggplot2") install.packages("RCurl") install.packages("archivist") library("DALEX") # pakiet umożliwi analizę czarnych skrzynek library("rms") # pakiet umożliwi modelowanie zależności krzywoliniowych za pomocą regresji library("randomForest") # lasy losowe library("gbm") # gradient boosting library("e1071") # support vector machine (maszyna wektorów nośnych) library("ggplot2") # wykresy i wizualizacja danych library("RCurl") # ???? library("archivist") titanic<-archivist::aread("pbiecek/models/27e5c") #wyświetlenie danych z tabeli titanic View(titanic) #model regresji logistycznej: titanic_lmr <- rms::lrm(survived == "yes" ~ gender + rms::rcs(age) + class + sibsp + parch + fare + embarked, titanic) #model lasów losowych set.seed(1313) titanic_rf <- randomForest::randomForest(survived ~ class + gender + age + sibsp + parch + fare + embarked, data = titanic, na.action=na.roughfix) #model gradient boosting set.seed(1313) titanic_gbm <- gbm::gbm(survived == "yes" ~ class + gender + age + sibsp + parch + fare + embarked, data = titanic, n.trees = 15000, distribution = "bernoulli") #model maszyny wektorow nosnych: set.seed(1313) titanic_svm <- e1071::svm(survived == "yes" ~ class + gender + age + sibsp + parch + fare + embarked, data = titanic, type = "C-classification", probability = TRUE) #Zapakowanie modeli do podobnych opakowań (explainerów): titanic_lmr_exp <- explain(model = titanic_lmr, data = titanic[, -9], y = titanic$survived == "yes", label = "Logistic Regression", type = "classification") titanic_rf_exp <- explain(model = titanic_rf, data = titanic[, -9], y = titanic$survived == "yes", label = "Random Forest") titanic_gbm_exp <- explain(model = titanic_gbm, data = titanic[, -9], y = titanic$survived == "yes", label = "Generalized Boosted Regression") titanic_svm_exp <- explain(model = titanic_svm, data = titanic[, -9], y = titanic$survived == "yes", label = "Support Vector Machine") #porównywanie siły predykcyjnej modeli: model_performance(titanic_lmr_exp)$measures$auc model_performance(titanic_rf_exp)$measures$auc model_performance(titanic_gbm_exp)$measures$auc model_performance(titanic_svm_exp)$measures$auc #nasi pasazerowie jack <- data.frame( class = factor("3rd", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")), gender = factor("male", levels = c("female", "male")), age = 20, sibsp = 0, parch = 0, fare = 10, embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton")) ) rose <- data.frame( class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")), gender = factor("female", levels = c("female", "male")), age = 17, sibsp = 0, parch = 1, fare = 850, embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton")) ) #twoja kolej #twoje_imie <- data.frame( #class = factor(, levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")), #gender = factor(, levels = c("female", "male")), #age = , #sibsp = , #parch = , #fare = , #embarked = factor(, levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton")) #) {print(jack) print(rose)} #str(archivist::aread("pbiecek/models/e3596")) #str(jack) #przewidywania modeli dla pasazerow {print(predict(titanic_lmr_exp, jack)) print(predict(titanic_rf_exp, jack)) print(predict(titanic_gbm_exp, jack)) print(predict(titanic_svm_exp, jack)) } {print(predict(titanic_lmr_exp, rose)) print(predict(titanic_rf_exp, rose)) print(predict(titanic_gbm_exp, rose)) print(predict(titanic_svm_exp, rose))} #co wpływa na wynik w danym modelu? #JACK #model regresji logistycznej bd_lmr <- predict_parts(explainer = titanic_lmr_exp, new_observation = jack, type = "break_down") plot(bd_lmr) #model lasow losowych bd_rf <- predict_parts(explainer = titanic_rf_exp, new_observation = jack, type = "break_down") plot(bd_rf) #model gradient boosting bd_gbm <- predict_parts(explainer = titanic_gbm_exp, new_observation = jack, type = "break_down") plot(bd_gbm) #model maszyny wektorów bd_svm <- predict_parts(explainer = titanic_svm_exp, new_observation = jack, type = "break_down") plot(bd_svm) #ROSE #model regresji logistycznej bd_lmr <- predict_parts(explainer = titanic_lmr_exp, new_observation = rose, type = "break_down") plot(bd_lmr) #model lasow losowych bd_rf <- predict_parts(explainer = titanic_rf_exp, new_observation = rose, type = "break_down") plot(bd_rf) #model gradient boosting bd_gbm <- predict_parts(explainer = titanic_gbm_exp, new_observation = rose, type = "break_down") plot(bd_gbm) #model maszyny wektorów bd_svm <- predict_parts(explainer = titanic_svm_exp, new_observation = rose, type = "break_down") plot(bd_svm) #jak wplywa wiek i cena biletu na szanse przezycia #w modelu gradient boosting? library("ggplot2") cp_titanic_gbm <- predict_profile(explainer = titanic_gbm_exp, new_observation = jack) plot(cp_titanic_gbm, variables = c("age", "fare")) + ggtitle("Ceteris-paribus profile", "") + ylim(0, 0.8) #jak wplywa klasa, miejsce z ktorego wyruszyl pasazer oraz plec #w modelu regresji logistycznej? cp_titanic_lmr <- predict_profile(explainer = titanic_lmr_exp, new_observation = jack) plot(cp_titanic_lmr, variables = c("class", "embarked", "gender"), variable_type = "categorical", categorical_type = "bars") + ggtitle("Ceteris-paribus profile", "") # od jakich zmiennych zależy predykcja? #w modelu regresji logistycznej vip_lm <- model_parts(explainer = titanic_lmr_exp, B = 50, N = NULL) plot(vip_lm) #w modelu lasów losowych vip_rf <- model_parts(explainer = titanic_rf_exp, B = 50, N = NULL) plot(vip_rf) #w modelu gradient boosting vip_gbm <- model_parts(explainer = titanic_gbm_exp, B = 50, N = NULL) plot(vip_gbm) #w modelu maszyny wektorów vip_svm <- model_parts(explainer = titanic_svm_exp, B = 50, N = NULL) plot(vip_svm) #W jaki sposób średnio przeżywalność zależy od wieku? plot(model_profile(explainer = titanic_rf_exp, variables = "age"), geom="profiles") #W jaki sposób średnio przeżywalność zależy od płci? plot(model_profile(explainer = titanic_rf_exp, variables = "age", groups = "gender"), geom="profiles")