{ "cells": [ { "cell_type": "markdown", "id": "89405a63", "metadata": {}, "source": [ "# Shapash in Jupyter - GLM Regression Overview\n", "\n", "This tutorial shows how to:\n", "- train a GLM regressor on House Prices data\n", "- evaluate predictions\n", "- use Shapash for global and local explainability\n", "- optionally launch the Shapash webapp" ] }, { "cell_type": "code", "execution_count": null, "id": "e578ffed", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "from category_encoders import one_hot\n", "from sklearn.linear_model import GammaRegressor\n", "from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "from shapash import SmartExplainer\n", "from shapash.data.data_loader import data_loading" ] }, { "cell_type": "markdown", "id": "8f62b4ae", "metadata": {}, "source": [ "## 1. Build a GLM regressor" ] }, { "cell_type": "code", "execution_count": 2, "id": "67c70817", "metadata": {}, "outputs": [ { "data": { "application/vnd.microsoft.datawrangler.viewer.v0+json": { "columns": [ { "name": "Id", "rawType": "int64", "type": "integer" }, { "name": "MSSubClass", "rawType": "object", "type": "string" }, { "name": "MSZoning", "rawType": "object", "type": "string" }, { "name": "LotArea", "rawType": "int64", "type": "integer" }, { "name": "Street", "rawType": "object", "type": "string" }, { "name": "LotShape", "rawType": "object", "type": "string" }, { "name": "LandContour", "rawType": "object", "type": "string" }, { "name": "Utilities", "rawType": "object", "type": "string" }, { "name": "LotConfig", "rawType": "object", "type": "string" }, { "name": "LandSlope", "rawType": "object", "type": "string" }, { "name": "Neighborhood", "rawType": "object", "type": "string" }, { "name": "Condition1", "rawType": "object", "type": "string" }, { "name": "Condition2", "rawType": "object", "type": "string" }, { "name": "BldgType", "rawType": "object", "type": "string" }, { "name": "HouseStyle", "rawType": "object", "type": "string" }, { "name": "OverallQual", "rawType": "int64", "type": "integer" }, { "name": "OverallCond", "rawType": "int64", "type": "integer" }, { "name": "YearBuilt", "rawType": "int64", "type": "integer" }, { "name": "YearRemodAdd", "rawType": "int64", "type": "integer" }, { "name": "RoofStyle", "rawType": "object", "type": "string" }, { "name": "RoofMatl", "rawType": "object", "type": "string" }, { "name": "Exterior1st", "rawType": "object", "type": "string" }, { "name": "Exterior2nd", "rawType": "object", "type": "string" }, { "name": "MasVnrType", "rawType": "object", "type": "unknown" }, { "name": "MasVnrArea", "rawType": "float64", "type": "float" }, { "name": "ExterQual", "rawType": "object", "type": "string" }, { "name": "ExterCond", "rawType": "object", "type": "string" }, { "name": "Foundation", "rawType": "object", "type": "string" }, { "name": "BsmtQual", "rawType": "object", "type": "string" }, { "name": "BsmtCond", "rawType": "object", "type": "string" }, { "name": "BsmtExposure", "rawType": "object", "type": "string" }, { "name": "BsmtFinType1", "rawType": "object", "type": "string" }, { "name": "BsmtFinSF1", "rawType": "int64", "type": "integer" }, { "name": "BsmtFinType2", "rawType": "object", "type": "string" }, { "name": "BsmtFinSF2", "rawType": "int64", "type": "integer" }, { "name": "BsmtUnfSF", "rawType": "int64", "type": "integer" }, { "name": "TotalBsmtSF", "rawType": "int64", "type": "integer" }, { "name": "Heating", "rawType": "object", "type": "string" }, { "name": "HeatingQC", "rawType": "object", "type": "string" }, { "name": "CentralAir", "rawType": "object", "type": "string" }, { "name": "Electrical", "rawType": "object", "type": "string" }, { "name": "1stFlrSF", "rawType": "int64", "type": "integer" }, { "name": "2ndFlrSF", "rawType": "int64", "type": "integer" }, { "name": "LowQualFinSF", "rawType": "int64", "type": "integer" }, { "name": "GrLivArea", "rawType": "int64", "type": "integer" }, { "name": "BsmtFullBath", "rawType": "int64", "type": "integer" }, { "name": "BsmtHalfBath", "rawType": "int64", "type": "integer" }, { "name": "FullBath", "rawType": "int64", "type": "integer" }, { "name": "HalfBath", "rawType": "int64", "type": "integer" }, { "name": "BedroomAbvGr", "rawType": "int64", "type": "integer" }, { "name": "KitchenAbvGr", "rawType": "int64", "type": "integer" }, { "name": "KitchenQual", "rawType": "object", "type": "string" }, { "name": "TotRmsAbvGrd", "rawType": "int64", "type": "integer" }, { "name": "Functional", "rawType": "object", "type": "string" }, { "name": "Fireplaces", "rawType": "int64", "type": "integer" }, { "name": "GarageType", "rawType": "object", "type": "string" }, { "name": "GarageYrBlt", "rawType": "float64", "type": "float" }, { "name": "GarageFinish", "rawType": "object", "type": "string" }, { "name": "GarageArea", "rawType": "int64", "type": "integer" }, { "name": "GarageQual", "rawType": "object", "type": "string" }, { "name": "GarageCond", "rawType": "object", "type": "string" }, { "name": "PavedDrive", "rawType": "object", "type": "string" }, { "name": "WoodDeckSF", "rawType": "int64", "type": "integer" }, { "name": "OpenPorchSF", "rawType": "int64", "type": "integer" }, { "name": "EnclosedPorch", "rawType": "int64", "type": "integer" }, { "name": "3SsnPorch", "rawType": "int64", "type": "integer" }, { "name": "ScreenPorch", "rawType": "int64", "type": "integer" }, { "name": "PoolArea", "rawType": "int64", "type": "integer" }, { "name": "MiscVal", "rawType": "int64", "type": "integer" }, { "name": "MoSold", "rawType": "int64", "type": "integer" }, { "name": "YrSold", "rawType": "int64", "type": "integer" }, { "name": "SaleType", "rawType": "object", "type": "string" }, { "name": "SaleCondition", "rawType": "object", "type": "string" }, { "name": "SalePrice", "rawType": "int64", "type": "integer" } ], "ref": "1494fad0-b3b5-436b-a0d4-b49c6bde3425", "rows": [ [ "1", "2-Story 1946 & Newer", "Residential Low Density", "8450", "Paved", "Regular", "Near Flat/Level", "All public Utilities (E,G,W,& S)", "Inside lot", "Gentle slope", "College Creek", "Normal", "Normal", "Single-family Detached", "Two story", "7", "5", "2003", "2003", "Gable", "Standard (Composite) Shingle", "Vinyl Siding", "Vinyl Siding", "Brick Face", "196.0", "Good", "Average/Typical", "Poured Contrete", "Good (90-99 inches)", "Typical - slight dampness allowed", "No Exposure/No Basement", "Good Living Quarters", "706", "Unfinished/No Basement", "0", "150", "856", "Gas forced warm air furnace", "Excellent", "Yes", "Standard Circuit Breakers & Romex", "856", "854", "0", "1710", "1", "0", "2", "1", "3", "1", "Good", "8", "Typical Functionality", "0", "Attached to home", "2003.0", "Rough Finished", "548", "Typical/Average", "Typical/Average", "Paved", "0", "61", "0", "0", "0", "0", "0", "2", "2008", "Warranty Deed - Conventional", "Normal Sale", "208500" ], [ "2", "1-Story 1946 & Newer All Styles", "Residential Low Density", "9600", "Paved", "Regular", "Near Flat/Level", "All public Utilities (E,G,W,& S)", "Frontage on 2 sides of property", "Gentle slope", "Veenker", "Adjacent to feeder street", "Normal", "Single-family Detached", "One story", "6", "8", "1976", "1976", "Gable", "Standard (Composite) Shingle", "Metal Siding", "Metal Siding", null, "0.0", "Average/Typical", "Average/Typical", "Cinder Block", "Good (90-99 inches)", "Typical - slight dampness allowed", "Good Exposure", "Average Living Quarters", "978", "Unfinished/No Basement", "0", "284", "1262", "Gas forced warm air furnace", "Excellent", "Yes", "Standard Circuit Breakers & Romex", "1262", "0", "0", "1262", "0", "1", "2", "0", "3", "1", "Typical/Average", "6", "Typical Functionality", "1", "Attached to home", "1976.0", "Rough Finished", "460", "Typical/Average", "Typical/Average", "Paved", "298", "0", "0", "0", "0", "0", "0", "5", "2007", "Warranty Deed - Conventional", "Normal Sale", "181500" ], [ "3", "2-Story 1946 & Newer", "Residential Low Density", "11250", "Paved", "Slightly irregular", "Near Flat/Level", "All public Utilities (E,G,W,& S)", "Inside lot", "Gentle slope", "College Creek", "Normal", "Normal", "Single-family Detached", "Two story", "7", "5", "2001", "2002", "Gable", "Standard (Composite) Shingle", "Vinyl Siding", "Vinyl Siding", "Brick Face", "162.0", "Good", "Average/Typical", "Poured Contrete", "Good (90-99 inches)", "Typical - slight dampness allowed", "Mimimum Exposure", "Good Living Quarters", "486", "Unfinished/No Basement", "0", "434", "920", "Gas forced warm air furnace", "Excellent", "Yes", "Standard Circuit Breakers & Romex", "920", "866", "0", "1786", "1", "0", "2", "1", "3", "1", "Good", "6", "Typical Functionality", "1", "Attached to home", "2001.0", "Rough Finished", "608", "Typical/Average", "Typical/Average", "Paved", "0", "42", "0", "0", "0", "0", "0", "9", "2008", "Warranty Deed - Conventional", "Normal Sale", "223500" ], [ "4", "2-Story 1945 & Older", "Residential Low Density", "9550", "Paved", "Slightly irregular", "Near Flat/Level", "All public Utilities (E,G,W,& S)", "Corner lot", "Gentle slope", "Crawford", "Normal", "Normal", "Single-family Detached", "Two story", "7", "5", "1915", "1970", "Gable", "Standard (Composite) Shingle", "Wood Siding", "Wood Shingles", null, "0.0", "Average/Typical", "Average/Typical", "Brick & Tile", "Typical (80-89 inches)", "Good", "No Exposure/No Basement", "Average Living Quarters", "216", "Unfinished/No Basement", "0", "540", "756", "Gas forced warm air furnace", "Good", "Yes", "Standard Circuit Breakers & Romex", "961", "756", "0", "1717", "1", "0", "1", "0", "3", "1", "Good", "7", "Typical Functionality", "1", "Detached from home", "1998.0", "Unfinished/No Garage", "642", "Typical/Average", "Typical/Average", "Paved", "0", "35", "272", "0", "0", "0", "0", "2", "2006", "Warranty Deed - Conventional", "Abnormal Sale", "140000" ], [ "5", "2-Story 1946 & Newer", "Residential Low Density", "14260", "Paved", "Slightly irregular", "Near Flat/Level", "All public Utilities (E,G,W,& S)", "Frontage on 2 sides of property", "Gentle slope", "Northridge", "Normal", "Normal", "Single-family Detached", "Two story", "8", "5", "2000", "2000", "Gable", "Standard (Composite) Shingle", "Vinyl Siding", "Vinyl Siding", "Brick Face", "350.0", "Good", "Average/Typical", "Poured Contrete", "Good (90-99 inches)", "Typical - slight dampness allowed", "Average Exposure", "Good Living Quarters", "655", "Unfinished/No Basement", "0", "490", "1145", "Gas forced warm air furnace", "Excellent", "Yes", "Standard Circuit Breakers & Romex", "1145", "1053", "0", "2198", "1", "0", "2", "1", "4", "1", "Good", "9", "Typical Functionality", "1", "Attached to home", "2000.0", "Rough Finished", "836", "Typical/Average", "Typical/Average", "Paved", "192", "84", "0", "0", "0", "0", "0", "12", "2008", "Warranty Deed - Conventional", "Normal Sale", "250000" ] ], "shape": { "columns": 73, "rows": 5 } }, "text/html": [ "
| \n", " | MSSubClass | \n", "MSZoning | \n", "LotArea | \n", "Street | \n", "LotShape | \n", "LandContour | \n", "Utilities | \n", "LotConfig | \n", "LandSlope | \n", "Neighborhood | \n", "... | \n", "EnclosedPorch | \n", "3SsnPorch | \n", "ScreenPorch | \n", "PoolArea | \n", "MiscVal | \n", "MoSold | \n", "YrSold | \n", "SaleType | \n", "SaleCondition | \n", "SalePrice | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "2-Story 1946 & Newer | \n", "Residential Low Density | \n", "8450 | \n", "Paved | \n", "Regular | \n", "Near Flat/Level | \n", "All public Utilities (E,G,W,& S) | \n", "Inside lot | \n", "Gentle slope | \n", "College Creek | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "2008 | \n", "Warranty Deed - Conventional | \n", "Normal Sale | \n", "208500 | \n", "
| 2 | \n", "1-Story 1946 & Newer All Styles | \n", "Residential Low Density | \n", "9600 | \n", "Paved | \n", "Regular | \n", "Near Flat/Level | \n", "All public Utilities (E,G,W,& S) | \n", "Frontage on 2 sides of property | \n", "Gentle slope | \n", "Veenker | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "5 | \n", "2007 | \n", "Warranty Deed - Conventional | \n", "Normal Sale | \n", "181500 | \n", "
| 3 | \n", "2-Story 1946 & Newer | \n", "Residential Low Density | \n", "11250 | \n", "Paved | \n", "Slightly irregular | \n", "Near Flat/Level | \n", "All public Utilities (E,G,W,& S) | \n", "Inside lot | \n", "Gentle slope | \n", "College Creek | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "9 | \n", "2008 | \n", "Warranty Deed - Conventional | \n", "Normal Sale | \n", "223500 | \n", "
| 4 | \n", "2-Story 1945 & Older | \n", "Residential Low Density | \n", "9550 | \n", "Paved | \n", "Slightly irregular | \n", "Near Flat/Level | \n", "All public Utilities (E,G,W,& S) | \n", "Corner lot | \n", "Gentle slope | \n", "Crawford | \n", "... | \n", "272 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "2006 | \n", "Warranty Deed - Conventional | \n", "Abnormal Sale | \n", "140000 | \n", "
| 5 | \n", "2-Story 1946 & Newer | \n", "Residential Low Density | \n", "14260 | \n", "Paved | \n", "Slightly irregular | \n", "Near Flat/Level | \n", "All public Utilities (E,G,W,& S) | \n", "Frontage on 2 sides of property | \n", "Gentle slope | \n", "Northridge | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "12 | \n", "2008 | \n", "Warranty Deed - Conventional | \n", "Normal Sale | \n", "250000 | \n", "
5 rows × 73 columns
\n", "| \n", " | pred | \n", "feature_1 | \n", "value_1 | \n", "contribution_1 | \n", "feature_2 | \n", "value_2 | \n", "contribution_2 | \n", "feature_3 | \n", "value_3 | \n", "contribution_3 | \n", "... | \n", "contribution_5 | \n", "feature_6 | \n", "value_6 | \n", "contribution_6 | \n", "feature_7 | \n", "value_7 | \n", "contribution_7 | \n", "feature_8 | \n", "value_8 | \n", "contribution_8 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 259 | \n", "215231.510661 | \n", "Second floor square feet | \n", "829 | \n", "7212.75529 | \n", "Original construction date | \n", "2001 | \n", "6387.987536 | \n", "Three season porch area in square feet | \n", "245 | \n", "6019.076585 | \n", "... | \n", "4495.137907 | \n", "Half baths above grade | \n", "1 | \n", "4058.735982 | \n", "Exterior covering on house | \n", "Vinyl Siding | \n", "3440.485441 | \n", "Ground living area square feet | \n", "1792 | \n", "3248.712227 | \n", "
| 268 | \n", "160397.678094 | \n", "Building Class | \n", "2-1/2 Story All Ages | \n", "-21612.884252 | \n", "Overall condition of the house | \n", "8 | \n", "12250.944277 | \n", "Ground living area square feet | \n", "2192 | \n", "10575.561484 | \n", "... | \n", "-9545.699803 | \n", "Low quality finished square feet | \n", "420 | \n", "9028.738252 | \n", "Original construction date | \n", "1939 | \n", "-7836.722046 | \n", "Rating of basement finished area | \n", "Low Quality | \n", "-5422.044481 | \n", "
| 289 | \n", "115701.508983 | \n", "Ground living area square feet | \n", "900 | \n", "-12626.35656 | \n", "Overall material and finish of the house | \n", "5 | \n", "-8356.89129 | \n", "Size of garage in square feet | \n", "280 | \n", "-7095.89495 | \n", "... | \n", "-4334.867376 | \n", "Full bathrooms above grade | \n", "1 | \n", "-3159.065666 | \n", "Physical locations within Ames city limits | \n", "Sawyer | \n", "-3075.744804 | \n", "Number of fireplaces | \n", "0 | \n", "-3056.889288 | \n", "
| 650 | \n", "80205.258207 | \n", "Physical locations within Ames city limits | \n", "Meadow Village | \n", "-17473.852842 | \n", "Ground living area square feet | \n", "630 | \n", "-14787.896977 | \n", "Size of garage in square feet | \n", "0 | \n", "-14247.389586 | \n", "... | \n", "-11790.654848 | \n", "Exterior covering on house | \n", "Cement Board | \n", "-10484.672846 | \n", "Style of dwelling | \n", "Split Foyer | \n", "-7235.055635 | \n", "First Floor square feet | \n", "630 | \n", "-6718.703928 | \n", "
| 1234 | \n", "130344.536158 | \n", "Condition of sale | \n", "Abnormal Sale | \n", "-9608.452839 | \n", "Overall material and finish of the house | \n", "5 | \n", "-8885.747137 | \n", "Ground living area square feet | \n", "1188 | \n", "-8100.302992 | \n", "... | \n", "-5756.506194 | \n", "Type 1 finished square feet | \n", "1000 | \n", "4126.536304 | \n", "Heating quality and condition | \n", "Fair | \n", "-4000.527692 | \n", "Full bathrooms above grade | \n", "1 | \n", "-3461.324322 | \n", "
| 168 | \n", "352193.512148 | \n", "Type of sale | \n", "Home just constructed and sold | \n", "27423.64341 | \n", "Physical locations within Ames city limits | \n", "Northridge Heights | \n", "26264.829848 | \n", "Overall material and finish of the house | \n", "8 | \n", "15713.549155 | \n", "... | \n", "15303.678926 | \n", "Kitchen quality | \n", "Excellent | \n", "13895.520143 | \n", "Condition of sale | \n", "Home wasn't completed | \n", "-13460.690594 | \n", "Type 1 finished square feet | \n", "1288 | \n", "10685.074459 | \n", "
| 927 | \n", "295017.319098 | \n", "Physical locations within Ames city limits | \n", "Northridge Heights | \n", "23176.411653 | \n", "Ground living area square feet | \n", "2374 | \n", "19431.401488 | \n", "Second floor square feet | \n", "1140 | \n", "15255.715262 | \n", "... | \n", "12034.304356 | \n", "Original construction date | \n", "2003 | \n", "8068.794323 | \n", "Size of garage in square feet | \n", "656 | \n", "7847.434879 | \n", "Type 1 finished square feet | \n", "0 | \n", "-5794.106906 | \n", "
| 832 | \n", "154075.134016 | \n", "Building Class | \n", "2-Story PUD-1946 & Newer | \n", "-13398.22308 | \n", "First Floor square feet | \n", "520 | \n", "-11003.949501 | \n", "Ground living area square feet | \n", "1200 | \n", "-8430.064484 | \n", "... | \n", "6333.763524 | \n", "Total square feet of basement area | \n", "600 | \n", "-4856.809903 | \n", "General zoning classification | \n", "Floating Village Residential | \n", "4332.979451 | \n", "Type 1 finished square feet | \n", "0 | \n", "-4234.924633 | \n", "
| 1238 | \n", "202185.042361 | \n", "Second floor square feet | \n", "1101 | \n", "11612.437376 | \n", "Original construction date | \n", "2004 | \n", "6908.342505 | \n", "First Floor square feet | \n", "847 | \n", "-6712.185727 | \n", "... | \n", "-4905.248373 | \n", "Overall material and finish of the house | \n", "7 | \n", "4247.437853 | \n", "Half baths above grade | \n", "1 | \n", "3896.85019 | \n", "Exterior covering on house | \n", "Vinyl Siding | \n", "3281.530173 | \n", "
| 427 | \n", "245162.694436 | \n", "Screen porch area in square feet | \n", "396 | \n", "16317.597159 | \n", "Type 1 finished square feet | \n", "1518 | \n", "11626.876519 | \n", "Refers to walkout or garden level walls | \n", "Good Exposure | \n", "10301.282465 | \n", "... | \n", "-9292.625571 | \n", "First Floor square feet | \n", "1644 | \n", "8228.216186 | \n", "Second floor square feet | \n", "0 | \n", "-7922.166877 | \n", "Building Class | \n", "Split or Multi-Level | \n", "-7435.296436 | \n", "
10 rows × 25 columns
\n", "