diff --git a/numpy_les2/dz_analitic3.ipynb b/numpy_les2/dz_analitic3.ipynb index 45b98dd..cc2f390 100644 --- a/numpy_les2/dz_analitic3.ipynb +++ b/numpy_les2/dz_analitic3.ipynb @@ -1 +1,4980 @@ -{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"tTr7YqcVvptR"},"outputs":[],"source":["#1. Импортируйте библиотеки pandas и numpy.\n","import numpy as np\n","import pandas as pd"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uvJgmxd9vptr","executionInfo":{"status":"ok","timestamp":1645102447075,"user_tz":-300,"elapsed":644,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"d631acd5-84d5-454c-b731-066948675442"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n","\n"," The Boston housing prices dataset has an ethical problem. You can refer to\n"," the documentation of this function for further details.\n","\n"," The scikit-learn maintainers therefore strongly discourage the use of this\n"," dataset unless the purpose of the code is to study and educate about\n"," ethical issues in data science and machine learning.\n","\n"," In this special case, you can fetch the dataset from the original\n"," source::\n","\n"," import pandas as pd\n"," import numpy as np\n","\n","\n"," data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n"," raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n"," data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n"," target = raw_df.values[1::2, 2]\n","\n"," Alternative datasets include the California housing dataset (i.e.\n"," :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n"," dataset. You can load the datasets as follows::\n","\n"," from sklearn.datasets import fetch_california_housing\n"," housing = fetch_california_housing()\n","\n"," for the California housing dataset and::\n","\n"," from sklearn.datasets import fetch_openml\n"," housing = fetch_openml(name=\"house_prices\", as_frame=True)\n","\n"," for the Ames housing dataset.\n"," \n"," warnings.warn(msg, category=FutureWarning)\n"]},{"output_type":"execute_result","data":{"text/plain":["dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])"]},"metadata":{},"execution_count":2}],"source":["#Загрузите \"Boston House Prices dataset\" из встроенных наборов данных библиотеки sklearn. \n","from sklearn.datasets import load_boston\n","boston = load_boston()\n","boston.keys()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"biPusKfIvpt2","executionInfo":{"status":"ok","timestamp":1645102482478,"user_tz":-300,"elapsed":680,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"6ef83c33-828a-4c8b-a379-613d828516bc"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["(506, 13)"]},"metadata":{},"execution_count":3}],"source":["#Создайте датафреймы X и y из этих данных.\n","data = boston[\"data\"]\n","data.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BqL6OHzovpt7","executionInfo":{"status":"ok","timestamp":1645102496787,"user_tz":-300,"elapsed":582,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"5a3f82f5-1421-435a-de64-6c14aa6b036a"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',\n"," 'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n"," \n"," "],"text/plain":[" CRIM ZN INDUS CHAS NOX ... RAD TAX PTRATIO B LSTAT\n","0 0.00632 18.0 2.31 0.0 0.538 ... 1.0 296.0 15.3 396.90 4.98\n","1 0.02731 0.0 7.07 0.0 0.469 ... 2.0 242.0 17.8 396.90 9.14\n","2 0.02729 0.0 7.07 0.0 0.469 ... 2.0 242.0 17.8 392.83 4.03\n","3 0.03237 0.0 2.18 0.0 0.458 ... 3.0 222.0 18.7 394.63 2.94\n","4 0.06905 0.0 2.18 0.0 0.458 ... 3.0 222.0 18.7 396.90 5.33\n","\n","[5 rows x 13 columns]"]},"metadata":{},"execution_count":7}],"source":["X = pd.DataFrame(data, columns=feature_names)\n","X.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"44nL6Sa9vpuH","executionInfo":{"status":"ok","timestamp":1645102602224,"user_tz":-300,"elapsed":494,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"755e9f26-8f7c-4a23-bfd5-7d487ba5dd87"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 506 entries, 0 to 505\n","Data columns (total 13 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 CRIM 506 non-null float64\n"," 1 ZN 506 non-null float64\n"," 2 INDUS 506 non-null float64\n"," 3 CHAS 506 non-null float64\n"," 4 NOX 506 non-null float64\n"," 5 RM 506 non-null float64\n"," 6 AGE 506 non-null float64\n"," 7 DIS 506 non-null float64\n"," 8 RAD 506 non-null float64\n"," 9 TAX 506 non-null float64\n"," 10 PTRATIO 506 non-null float64\n"," 11 B 506 non-null float64\n"," 12 LSTAT 506 non-null float64\n","dtypes: float64(13)\n","memory usage: 51.5 KB\n"]}],"source":["X.info()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QX6NtkRvvpuL","executionInfo":{"status":"ok","timestamp":1645102631315,"user_tz":-300,"elapsed":857,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"e3de32df-8986-4d87-ddfe-4dd178f79bca"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n","RangeIndex: 506 entries, 0 to 505\n","Data columns (total 1 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 price 506 non-null float64\n","dtypes: float64(1)\n","memory usage: 4.1 KB\n"]}],"source":["y = pd.DataFrame(target, columns=[\"price\"])\n","y.info()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"J_pVV9ynvpuQ"},"outputs":[],"source":["#Разбейте эти датафреймы на тренировочные (X_train, y_train) и тестовые (X_test, y_test) \n","#с помощью функции train_test_split так, чтобы размер тестовой выборки составлял 30% от \n","#всех данных, при этом аргумент random_state должен быть равен 42. \n","from sklearn.model_selection import train_test_split\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xK_P8XD7vpuX"},"outputs":[],"source":["#Создайте модель линейной регрессии под названием lr с помощью класса \n","#LinearRegression из модуля sklearn.linear_model. \n","from sklearn.linear_model import LinearRegression\n","lr = LinearRegression()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"KZrt-xrwvpub","executionInfo":{"status":"ok","timestamp":1645102675535,"user_tz":-300,"elapsed":581,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"8e18ae4d-9407-496a-8a61-08b62d90fb67"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["LinearRegression()"]},"metadata":{},"execution_count":12}],"source":["#Обучите модель на тренировочных данных (используйте все признаки) и сделайте предсказание на тестовых.\n","lr.fit(X_train, y_train)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wuW0wldkvpue","executionInfo":{"status":"ok","timestamp":1645102686553,"user_tz":-300,"elapsed":518,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"96769c07-009e-4da9-c891-b231195193fd"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["(152, 1)"]},"metadata":{},"execution_count":13}],"source":["y_pred = lr.predict(X_test)\n","y_pred.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"id":"gIDIpPrevpui","executionInfo":{"status":"ok","timestamp":1645102693702,"user_tz":-300,"elapsed":623,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"2d7a1230-e409-4e1f-f87d-9420a8d5e873"},"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
y_testy_pred
9222.928.998112
36025.023.472965
3877.46.862435
4897.06.479228
30728.233.423096
28822.327.306120
45216.118.793646
20348.541.576196
23625.129.342199
4005.612.654184
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" y_test y_pred\n","92 22.9 28.998112\n","360 25.0 23.472965\n","387 7.4 6.862435\n","489 7.0 6.479228\n","307 28.2 33.423096\n","288 22.3 27.306120\n","452 16.1 18.793646\n","203 48.5 41.576196\n","236 25.1 29.342199\n","400 5.6 12.654184"]},"metadata":{},"execution_count":14}],"source":["check_test = pd.DataFrame({\n"," \"y_test\": y_test[\"price\"],\n"," \"y_pred\": y_pred.flatten(),\n","})\n","\n","check_test.head(10)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"DRKc79e4vpuk","executionInfo":{"status":"ok","timestamp":1645102705122,"user_tz":-300,"elapsed":637,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"f92678e2-9fbc-4dea-8f27-3d99c7649566"},"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
y_testy_prederror
9222.928.9981126.098112
36025.023.472965-1.527035
3877.46.862435-0.537565
4897.06.479228-0.520772
30728.233.4230965.223096
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" y_test y_pred error\n","92 22.9 28.998112 6.098112\n","360 25.0 23.472965 -1.527035\n","387 7.4 6.862435 -0.537565\n","489 7.0 6.479228 -0.520772\n","307 28.2 33.423096 5.223096"]},"metadata":{},"execution_count":15}],"source":["#Вычислите R2 полученных предказаний с помощью r2_score из модуля sklearn.metrics.\n","check_test[\"error\"] = check_test[\"y_pred\"] - check_test[\"y_test\"]\n","check_test.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gXSMLW8Qvpun","executionInfo":{"status":"ok","timestamp":1645102725363,"user_tz":-300,"elapsed":586,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"f9e307f1-b8ca-47d4-a259-8aac6acc3944"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.6436988807960613"]},"metadata":{},"execution_count":16}],"source":["from sklearn.metrics import r2_score\n","r2_score_1=r2_score(check_test[\"y_pred\"], check_test[\"y_test\"])\n","r2_score_1"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0DzYfsSGvpup"},"outputs":[],"source":["#2. Создайте модель под названием model с помощью RandomForestRegressor из модуля sklearn.ensemble.\n","#Сделайте агрумент n_estimators равным 1000, max_depth должен быть равен 12 и random_state сделайте равным 42. \n","from sklearn.ensemble import RandomForestRegressor\n","model = RandomForestRegressor(n_estimators=1000, max_depth=12, random_state=42)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Dk2V8hhwvpus","executionInfo":{"status":"ok","timestamp":1645102745210,"user_tz":-300,"elapsed":2797,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"0246f5c8-5d5f-4ee1-9e20-5a6e25f1363e"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["RandomForestRegressor(max_depth=12, n_estimators=1000, random_state=42)"]},"metadata":{},"execution_count":18}],"source":["#Обучите модель на тренировочных данных аналогично тому, как вы обучали модель LinearRegression, \n","#но при этом в метод fit вместо датафрейма y_train поставьте y_train.values[:, 0], чтобы получить \n","#из датафрейма одномерный массив Numpy, так как для класса RandomForestRegressor в данном методе \n","#для аргумента y предпочтительно применение массивов вместо датафрейма.\n","model.fit(X_train, y_train.values[:, 0])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iUQM3rgdvpuw","outputId":"7927a90b-3b43-4694-a7b8-787e0e4e9720"},"outputs":[{"data":{"text/plain":["(152,)"]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["#Сделайте предсказание на тестовых данных и посчитайте R2.\n","y_pred = model.predict(X_test)\n","y_pred.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"id":"_fspTr2qvpuz","executionInfo":{"status":"ok","timestamp":1645102751782,"user_tz":-300,"elapsed":630,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"ee59cb85-4482-4df6-c7d0-bf4602900682"},"outputs":[{"output_type":"execute_result","data":{"text/html":["\n","
\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
y_testy_pred
9222.928.998112
36025.023.472965
3877.46.862435
4897.06.479228
30728.233.423096
28822.327.306120
45216.118.793646
20348.541.576196
23625.129.342199
4005.612.654184
\n","
\n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "],"text/plain":[" y_test y_pred\n","92 22.9 28.998112\n","360 25.0 23.472965\n","387 7.4 6.862435\n","489 7.0 6.479228\n","307 28.2 33.423096\n","288 22.3 27.306120\n","452 16.1 18.793646\n","203 48.5 41.576196\n","236 25.1 29.342199\n","400 5.6 12.654184"]},"metadata":{},"execution_count":19}],"source":["check_test = pd.DataFrame({\n"," \"y_test\": y_test[\"price\"],\n"," \"y_pred\": y_pred.flatten(),\n","})\n","\n","check_test.head(10)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qtKz0tvsvpu2","executionInfo":{"status":"ok","timestamp":1645102763418,"user_tz":-300,"elapsed":940,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"9a487e98-5aa0-40be-fab2-550493567ce3"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.6436988807960613"]},"metadata":{},"execution_count":20}],"source":["r2_score_2=r2_score(check_test[\"y_pred\"], check_test[\"y_test\"])\n","r2_score_2"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"r368oFU1vpu5","executionInfo":{"status":"ok","timestamp":1645102766795,"user_tz":-300,"elapsed":17,"user":{"displayName":"Сергей Шибанов","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64","userId":"00971423091508291527"}},"outputId":"515b6ba6-522a-4b1b-f7cc-2725783da9cb"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["False"]},"metadata":{},"execution_count":21}],"source":["#Сравните с результатом из предыдущего задания.\n","#Напишите в комментариях к коду, какая модель в данном случае работает лучше.\n","r2_score_1\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.363787...-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425...-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.514654...0.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024...-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.817739...-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n","

5 rows × 31 columns

\n",""],"text/plain":[" Time V1 V2 V3 V4 V5 V6 V7 \\\n","0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n","1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n","2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n","3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n","4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n","\n"," V8 V9 ... V21 V22 V23 V24 V25 \\\n","0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n","1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n","2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n","3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n","4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n","\n"," V26 V27 V28 Amount Class \n","0 -0.189115 0.133558 -0.021053 149.62 0 \n","1 0.125895 -0.008983 0.014724 2.69 0 \n","2 -0.139097 -0.055353 -0.059752 378.66 0 \n","3 -0.221929 0.062723 0.061458 123.50 0 \n","4 0.502292 0.219422 0.215153 69.99 0 \n","\n","[5 rows x 31 columns]"]},"execution_count":30,"metadata":{},"output_type":"execute_result"}],"source":["#Загрузите датасет creditcard.csv и создайте датафрейм df.\n","df = pd.read_csv('creditcard.csv')\n","df.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZKI6KQ9PvpvJ","outputId":"ddb8fa72-c690-4769-eb8b-93a503c012a8"},"outputs":[{"data":{"text/plain":["0 0.998273\n","1 0.001727\n","Name: Class, dtype: float64"]},"execution_count":31,"metadata":{},"output_type":"execute_result"}],"source":["#С помощью метода value_counts с аргументом normalize=True убедитесь в том, что выборка несбалансирована.\n","df[\"Class\"].value_counts(normalize=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-nD44QtOvpvL","outputId":"08424e0c-28df-4d34-9421-9c0b94390f08"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 284807 entries, 0 to 284806\n","Data columns (total 31 columns):\n","Time 284807 non-null float64\n","V1 284807 non-null float64\n","V2 284807 non-null float64\n","V3 284807 non-null float64\n","V4 284807 non-null float64\n","V5 284807 non-null float64\n","V6 284807 non-null float64\n","V7 284807 non-null float64\n","V8 284807 non-null float64\n","V9 284807 non-null float64\n","V10 284807 non-null float64\n","V11 284807 non-null float64\n","V12 284807 non-null float64\n","V13 284807 non-null float64\n","V14 284807 non-null float64\n","V15 284807 non-null float64\n","V16 284807 non-null float64\n","V17 284807 non-null float64\n","V18 284807 non-null float64\n","V19 284807 non-null float64\n","V20 284807 non-null float64\n","V21 284807 non-null float64\n","V22 284807 non-null float64\n","V23 284807 non-null float64\n","V24 284807 non-null float64\n","V25 284807 non-null float64\n","V26 284807 non-null float64\n","V27 284807 non-null float64\n","V28 284807 non-null float64\n","Amount 284807 non-null float64\n","Class 284807 non-null int64\n","dtypes: float64(30), int64(1)\n","memory usage: 67.4 MB\n"]}],"source":["#Используя метод info, проверьте, все ли столбцы содержат числовые данные\n","df.info()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"K4BQ67jjvpvM","outputId":"a0000c45-4a9e-41da-bd7f-3e263779142e"},"outputs":[{"data":{"text/plain":["Time 0\n","V1 0\n","V2 0\n","V3 0\n","V4 0\n","V5 0\n","V6 0\n","V7 0\n","V8 0\n","V9 0\n","V10 0\n","V11 0\n","V12 0\n","V13 0\n","V14 0\n","V15 0\n","V16 0\n","V17 0\n","V18 0\n","V19 0\n","V20 0\n","V21 0\n","V22 0\n","V23 0\n","V24 0\n","V25 0\n","V26 0\n","V27 0\n","V28 0\n","Amount 0\n","Class 0\n","dtype: int32"]},"execution_count":33,"metadata":{},"output_type":"execute_result"}],"source":["#и нет ли в них пропусков\n","df.isnull().astype(np.int).sum().astype(np.int)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"BqwtVoU8vpvN"},"outputs":[],"source":["#Примените следующую настройку, чтобы можно было просматривать все столбцы датафрейма: \n","#pd.options.display.max_columns = 100.\n","pd.options.display.max_columns = 100"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"6IKKcAiSvpvO","outputId":"6c887466-9f4d-4e1f-83f2-f41722364aea"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
52.0-0.4259660.9605231.141109-0.1682520.420987-0.0297280.4762010.260314-0.568671-0.3714071.3412620.359894-0.358091-0.1371340.5176170.401726-0.0581330.068653-0.0331940.084968-0.208254-0.559825-0.026398-0.371427-0.2327940.1059150.2538440.0810803.670
64.01.2296580.1410040.0453711.2026130.1918810.272708-0.0051590.0812130.464960-0.099254-1.416907-0.153826-0.7510630.1673720.050144-0.4435870.002821-0.611987-0.045575-0.219633-0.167716-0.270710-0.154104-0.7800550.750137-0.2572370.0345070.0051684.990
77.0-0.6442691.4179641.074380-0.4921990.9489340.4281181.120631-3.8078640.6153751.249376-0.6194680.2914741.757964-1.3238650.686133-0.076127-1.222127-0.3582220.324505-0.1567421.943465-1.0154550.057504-0.649709-0.415267-0.051634-1.206921-1.08533940.800
87.0-0.8942860.286157-0.113192-0.2715262.6695993.7218180.3701450.851084-0.392048-0.410430-0.705117-0.110452-0.2862540.074355-0.328783-0.210077-0.4997680.1187650.5703280.052736-0.073425-0.268092-0.2042331.0115920.373205-0.3841570.0117470.14240493.200
99.0-0.3382621.1195931.044367-0.2221870.499361-0.2467610.6515830.069539-0.736727-0.3668461.0176140.8363901.006844-0.4435230.1502190.739453-0.5409800.4766770.4517730.203711-0.246914-0.633753-0.120794-0.385050-0.0697330.0941990.2462190.0830763.680
\n","
"],"text/plain":[" Time V1 V2 V3 V4 V5 V6 V7 \\\n","0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n","1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n","2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n","3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n","4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n","5 2.0 -0.425966 0.960523 1.141109 -0.168252 0.420987 -0.029728 0.476201 \n","6 4.0 1.229658 0.141004 0.045371 1.202613 0.191881 0.272708 -0.005159 \n","7 7.0 -0.644269 1.417964 1.074380 -0.492199 0.948934 0.428118 1.120631 \n","8 7.0 -0.894286 0.286157 -0.113192 -0.271526 2.669599 3.721818 0.370145 \n","9 9.0 -0.338262 1.119593 1.044367 -0.222187 0.499361 -0.246761 0.651583 \n","\n"," V8 V9 V10 V11 V12 V13 V14 \\\n","0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 \n","1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 \n","2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 \n","3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 \n","4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 \n","5 0.260314 -0.568671 -0.371407 1.341262 0.359894 -0.358091 -0.137134 \n","6 0.081213 0.464960 -0.099254 -1.416907 -0.153826 -0.751063 0.167372 \n","7 -3.807864 0.615375 1.249376 -0.619468 0.291474 1.757964 -1.323865 \n","8 0.851084 -0.392048 -0.410430 -0.705117 -0.110452 -0.286254 0.074355 \n","9 0.069539 -0.736727 -0.366846 1.017614 0.836390 1.006844 -0.443523 \n","\n"," V15 V16 V17 V18 V19 V20 V21 \\\n","0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 \n","1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 \n","2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 \n","3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 \n","4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 \n","5 0.517617 0.401726 -0.058133 0.068653 -0.033194 0.084968 -0.208254 \n","6 0.050144 -0.443587 0.002821 -0.611987 -0.045575 -0.219633 -0.167716 \n","7 0.686133 -0.076127 -1.222127 -0.358222 0.324505 -0.156742 1.943465 \n","8 -0.328783 -0.210077 -0.499768 0.118765 0.570328 0.052736 -0.073425 \n","9 0.150219 0.739453 -0.540980 0.476677 0.451773 0.203711 -0.246914 \n","\n"," V22 V23 V24 V25 V26 V27 V28 \\\n","0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 \n","1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 \n","2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 \n","3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 \n","4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 \n","5 -0.559825 -0.026398 -0.371427 -0.232794 0.105915 0.253844 0.081080 \n","6 -0.270710 -0.154104 -0.780055 0.750137 -0.257237 0.034507 0.005168 \n","7 -1.015455 0.057504 -0.649709 -0.415267 -0.051634 -1.206921 -1.085339 \n","8 -0.268092 -0.204233 1.011592 0.373205 -0.384157 0.011747 0.142404 \n","9 -0.633753 -0.120794 -0.385050 -0.069733 0.094199 0.246219 0.083076 \n","\n"," Amount Class \n","0 149.62 0 \n","1 2.69 0 \n","2 378.66 0 \n","3 123.50 0 \n","4 69.99 0 \n","5 3.67 0 \n","6 4.99 0 \n","7 40.80 0 \n","8 93.20 0 \n","9 3.68 0 "]},"execution_count":35,"metadata":{},"output_type":"execute_result"}],"source":["#Просмотрите первые 10 строк датафрейма df.\n","df.head(10)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"gWS6n9-CvpvR","outputId":"f85ad98a-dede-415a-84c4-57fbde41ab6c"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28Amount
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.62
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.69
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.66
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.50
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.99
\n","
"],"text/plain":[" Time V1 V2 V3 V4 V5 V6 V7 \\\n","0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n","1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n","2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n","3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n","4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n","\n"," V8 V9 V10 V11 V12 V13 V14 \\\n","0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 \n","1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 \n","2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 \n","3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 \n","4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 \n","\n"," V15 V16 V17 V18 V19 V20 V21 \\\n","0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 \n","1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 \n","2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 \n","3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 \n","4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 \n","\n"," V22 V23 V24 V25 V26 V27 V28 \\\n","0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 \n","1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 \n","2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 \n","3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 \n","4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 \n","\n"," Amount \n","0 149.62 \n","1 2.69 \n","2 378.66 \n","3 123.50 \n","4 69.99 "]},"execution_count":36,"metadata":{},"output_type":"execute_result"}],"source":["#Создайте датафрейм X из датафрейма df, исключив столбец Class.\n","X = df.drop(\"Class\", axis=1)\n","X.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"oeJlki8zvpvT","outputId":"cdea01bf-047f-4c04-82f2-d84661d0cce8"},"outputs":[{"data":{"text/plain":["0 0\n","1 0\n","2 0\n","3 0\n","4 0\n","Name: Class, dtype: int64"]},"execution_count":37,"metadata":{},"output_type":"execute_result"}],"source":["#Создайте объект Series под названием y из столбца Class.\n","y = df[\"Class\"]\n","y.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jUtviSb5vpvU"},"outputs":[],"source":["#Разбейте X и y на тренировочный и тестовый наборы данных при помощи функции train_test_split, \n","#используя аргументы: test_size=0.3, random_state=100, stratify=y.\n","#У вас должны получиться объекты X_train, X_test, y_train и y_test.\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100, stratify=y)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"mKjD8fYnvpvV","outputId":"20f76527-3824-47ca-8b76-5af8ba856a95"},"outputs":[{"data":{"text/plain":["((199364, 30), (85443, 30), (199364,), (85443,))"]},"execution_count":39,"metadata":{},"output_type":"execute_result"}],"source":["#Просмотрите информацию о их форме.\n","X_train.shape, X_test.shape, y_train.shape, y_test.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PeODdXarvpvW"},"outputs":[],"source":["#Для поиска по сетке параметров задайте такие параметры:\n","#parameters = [{'n_estimators': [10, 15], \n","#'max_features': np.arange(3, 5),\n","#'max_depth': np.arange(4, 7)}]\n","parameters = [{'n_estimators': [10, 15], \n","'max_features': np.arange(3, 5),\n","'max_depth': np.arange(4, 7)}]\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"TOj3o830vpvX"},"outputs":[],"source":["#Создайте модель GridSearchCV со следующими аргументами:\n","#estimator=RandomForestClassifier(random_state=100), \n","#param_grid=parameters,\n","#scoring='roc_auc',\n","#cv=3.\n","clf = GridSearchCV(\n"," estimator=RandomForestClassifier(random_state=100),\n"," param_grid=parameters,\n"," scoring='roc_auc',\n"," cv=3,\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"LL15ShxtvpvZ","outputId":"c4501d3d-a033-4be4-cf75-37509c37aa81"},"outputs":[{"data":{"text/plain":["GridSearchCV(cv=3, error_score='raise-deprecating',\n"," estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n"," max_depth=None, max_features='auto', max_leaf_nodes=None,\n"," min_impurity_decrease=0.0, min_impurity_split=None,\n"," min_samples_leaf=1, min_samples_split=2,\n"," min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,\n"," oob_score=False, random_state=100, verbose=0, warm_start=False),\n"," fit_params=None, iid='warn', n_jobs=None,\n"," param_grid=[{'n_estimators': [10, 15], 'max_features': array([3, 4]), 'max_depth': array([4, 5, 6])}],\n"," pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n"," scoring='roc_auc', verbose=0)"]},"execution_count":42,"metadata":{},"output_type":"execute_result"}],"source":["#Обучите модель на тренировочном наборе данных (может занять несколько минут).\n","clf.fit(X_train, y_train)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5pB3n-okvpva","outputId":"214555a6-70b7-45a0-8e45-082284913456"},"outputs":[{"data":{"text/plain":["{'max_depth': 6, 'max_features': 3, 'n_estimators': 15}"]},"execution_count":43,"metadata":{},"output_type":"execute_result"}],"source":["#Просмотрите параметры лучшей модели с помощью атрибута best_params_.\n","clf.best_params_"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Q2KYdOYmvpvb","outputId":"e405a7e4-0831-40fb-f836-a0fb349a0f53"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[9.99070828e-01 9.29171738e-04]\n"," [9.99704794e-01 2.95206364e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]\n"," [9.99717846e-01 2.82154033e-04]]\n"]}],"source":["#Предскажите вероятности классов с помощью полученнной модели и метода predict_proba.\n","y_pred_proba = clf.predict_proba(X_test)\n","print(y_pred_proba[:10])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PdRiplGRvpvc","outputId":"4b853da7-a0f8-4689-bba8-e7dfa193d71c"},"outputs":[{"name":"stdout","output_type":"stream","text":["[0.00092917 0.00029521 0.00028215 0.00028215 0.00028215]\n"]}],"source":["#Из полученного результата (массив Numpy) выберите столбец с индексом 1 (вероятность класса 1) \n","#и запишите в массив y_pred_proba.\n","y_pred_proba = y_pred_proba[:, 1]\n","print(y_pred_proba[:5])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Mry7O1_Avpvd"},"outputs":[],"source":["#Из модуля sklearn.metrics импортируйте метрику roc_auc_score.\n","from sklearn.metrics import roc_auc_score"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yj5PuZXKvpve","outputId":"2c545ebf-3874-448b-a12a-f516e25e6ac5"},"outputs":[{"data":{"text/plain":["0.9462664156037156"]},"execution_count":47,"metadata":{},"output_type":"execute_result"}],"source":["#Вычислите AUC на тестовых данных и сравните с результатом, \n","#полученным на тренировочных данных, используя в качестве аргументов\n","#массивы y_test и y_pred_proba.\n","roc_auc_score(y_test, y_pred_proba)"]},{"cell_type":"markdown","metadata":{"id":"5jmj2gM_vpvg"},"source":["####Дополнительные задания:"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"W9__mHwVvpvn","outputId":"1e28e2a8-8544-4b4a-9f7f-fbba3d3d3c5d"},"outputs":[{"data":{"text/plain":["dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])"]},"execution_count":48,"metadata":{},"output_type":"execute_result"}],"source":["#Загрузите датасет Wine из встроенных датасетов sklearn.datasets с помощью функции load_wine в переменную data.\n","#Полученный датасет не является датафреймом. Это структура данных, имеющая ключи аналогично словарю. \n","from sklearn.datasets import load_wine\n","data = load_wine()\n","data.keys()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"uI-vDg-ivpvp","outputId":"2a0c391e-d6c3-489d-b574-58a36c821e15"},"outputs":[{"data":{"text/plain":["sklearn.utils.Bunch"]},"execution_count":49,"metadata":{},"output_type":"execute_result"}],"source":["#Просмотрите тип данных этой структуры данных\n","type(data)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"c83d-aNUvpvq","outputId":"5babde0e-fafe-46ac-d90d-3cfeff235dfb"},"outputs":[{"data":{"text/plain":["['alcohol',\n"," 'malic_acid',\n"," 'ash',\n"," 'alcalinity_of_ash',\n"," 'magnesium',\n"," 'total_phenols',\n"," 'flavanoids',\n"," 'nonflavanoid_phenols',\n"," 'proanthocyanins',\n"," 'color_intensity',\n"," 'hue',\n"," 'od280/od315_of_diluted_wines',\n"," 'proline']"]},"execution_count":50,"metadata":{},"output_type":"execute_result"}],"source":["#и создайте список data_keys, содержащий ее ключи.\n","data_keys=data[\"feature_names\"]\n","data_keys"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"XS0lGGuIvpvs","outputId":"24ce1d20-dd97-4588-c56a-16041e26eb83"},"outputs":[{"data":{"text/plain":["array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n"," 1.065e+03],\n"," [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n"," 1.050e+03],\n"," [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n"," 1.185e+03],\n"," ...,\n"," [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n"," 8.350e+02],\n"," [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n"," 8.400e+02],\n"," [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n"," 5.600e+02]])"]},"execution_count":51,"metadata":{},"output_type":"execute_result"}],"source":["#Просмотрите данные\n","data.data"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"E13dmX7kvpvt","outputId":"24239ff3-d99d-4dd0-e1a7-56d1175a8d13"},"outputs":[{"data":{"text/plain":["(178, 13)"]},"execution_count":52,"metadata":{},"output_type":"execute_result"}],"source":["data.data.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JOYCfMA1vpvu","outputId":"7724e237-c0e0-41aa-bf3c-4be4005a5b4a"},"outputs":[{"name":"stdout","output_type":"stream","text":[".. _wine_dataset:\n","\n","Wine recognition dataset\n","------------------------\n","\n","**Data Set Characteristics:**\n","\n"," :Number of Instances: 178 (50 in each of three classes)\n"," :Number of Attributes: 13 numeric, predictive attributes and the class\n"," :Attribute Information:\n"," \t\t- Alcohol\n"," \t\t- Malic acid\n"," \t\t- Ash\n","\t\t- Alcalinity of ash \n"," \t\t- Magnesium\n","\t\t- Total phenols\n"," \t\t- Flavanoids\n"," \t\t- Nonflavanoid phenols\n"," \t\t- Proanthocyanins\n","\t\t- Color intensity\n"," \t\t- Hue\n"," \t\t- OD280/OD315 of diluted wines\n"," \t\t- Proline\n","\n"," - class:\n"," - class_0\n"," - class_1\n"," - class_2\n","\t\t\n"," :Summary Statistics:\n"," \n"," ============================= ==== ===== ======= =====\n"," Min Max Mean SD\n"," ============================= ==== ===== ======= =====\n"," Alcohol: 11.0 14.8 13.0 0.8\n"," Malic Acid: 0.74 5.80 2.34 1.12\n"," Ash: 1.36 3.23 2.36 0.27\n"," Alcalinity of Ash: 10.6 30.0 19.5 3.3\n"," Magnesium: 70.0 162.0 99.7 14.3\n"," Total Phenols: 0.98 3.88 2.29 0.63\n"," Flavanoids: 0.34 5.08 2.03 1.00\n"," Nonflavanoid Phenols: 0.13 0.66 0.36 0.12\n"," Proanthocyanins: 0.41 3.58 1.59 0.57\n"," Colour Intensity: 1.3 13.0 5.1 2.3\n"," Hue: 0.48 1.71 0.96 0.23\n"," OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71\n"," Proline: 278 1680 746 315\n"," ============================= ==== ===== ======= =====\n","\n"," :Missing Attribute Values: None\n"," :Class Distribution: class_0 (59), class_1 (71), class_2 (48)\n"," :Creator: R.A. Fisher\n"," :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n"," :Date: July, 1988\n","\n","This is a copy of UCI ML Wine recognition datasets.\n","https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n","\n","The data is the results of a chemical analysis of wines grown in the same\n","region in Italy by three different cultivators. There are thirteen different\n","measurements taken for different constituents found in the three types of\n","wine.\n","\n","Original Owners: \n","\n","Forina, M. et al, PARVUS - \n","An Extendible Package for Data Exploration, Classification and Correlation. \n","Institute of Pharmaceutical and Food Analysis and Technologies,\n","Via Brigata Salerno, 16147 Genoa, Italy.\n","\n","Citation:\n","\n","Lichman, M. (2013). UCI Machine Learning Repository\n","[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,\n","School of Information and Computer Science. \n","\n",".. topic:: References\n","\n"," (1) S. Aeberhard, D. Coomans and O. de Vel, \n"," Comparison of Classifiers in High Dimensional Settings, \n"," Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of \n"," Mathematics and Statistics, James Cook University of North Queensland. \n"," (Also submitted to Technometrics). \n","\n"," The data was used with many others for comparing various \n"," classifiers. The classes are separable, though only RDA \n"," has achieved 100% correct classification. \n"," (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) \n"," (All results using the leave-one-out technique) \n","\n"," (2) S. Aeberhard, D. Coomans and O. de Vel, \n"," \"THE CLASSIFICATION PERFORMANCE OF RDA\" \n"," Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of \n"," Mathematics and Statistics, James Cook University of North Queensland. \n"," (Also submitted to Journal of Chemometrics).\n","\n"]}],"source":["#описание и названия признаков в датасете. Описание нужно вывести в виде привычного, аккуратно \n","#оформленного текста, без обозначений переноса строки, но с самими переносами и т. д.\n","for line in data.DESCR.split('\\n'):\n"," print(line)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"RqPda_RJvpvw","outputId":"6b4daa2c-1010-430d-e792-01851c0ec438"},"outputs":[{"name":"stdout","output_type":"stream","text":[".. _wine_dataset:\n","\n","Wine recognition dataset\n","------------------------\n","\n","**Data Set Characteristics:**\n","\n"," :Number of Instances: 178 (50 in each of three classes)\n"," :Number of Attributes: 13 numeric, predictive attributes and the class\n"," :Attribute Information:\n"," \t\t- Alcohol\n"," \t\t- Malic acid\n"," \t\t- Ash\n","\t\t- Alcalinity of ash \n"," \t\t- Magnesium\n","\t\t- Total phenols\n"," \t\t- Flavanoids\n"," \t\t- Nonflavanoid phenols\n"," \t\t- Proanthocyanins\n","\t\t- Color intensity\n"," \t\t- Hue\n"," \t\t- OD280/OD315 of diluted wines\n"," \t\t- Proline\n","\n"," - class:\n"," - class_0\n"," - class_1\n"," - class_2\n","\t\t\n"," :Summary Statistics:\n"," \n"," ============================= ==== ===== ======= =====\n"," Min Max Mean SD\n"," ============================= ==== ===== ======= =====\n"," Alcohol: 11.0 14.8 13.0 0.8\n"," Malic Acid: 0.74 5.80 2.34 1.12\n"," Ash: 1.36 3.23 2.36 0.27\n"," Alcalinity of Ash: 10.6 30.0 19.5 3.3\n"," Magnesium: 70.0 162.0 99.7 14.3\n"," Total Phenols: 0.98 3.88 2.29 0.63\n"," Flavanoids: 0.34 5.08 2.03 1.00\n"," Nonflavanoid Phenols: 0.13 0.66 0.36 0.12\n"," Proanthocyanins: 0.41 3.58 1.59 0.57\n"," Colour Intensity: 1.3 13.0 5.1 2.3\n"," Hue: 0.48 1.71 0.96 0.23\n"," OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71\n"," Proline: 278 1680 746 315\n"," ============================= ==== ===== ======= =====\n","\n"," :Missing Attribute Values: None\n"," :Class Distribution: class_0 (59), class_1 (71), class_2 (48)\n"," :Creator: R.A. Fisher\n"," :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n"," :Date: July, 1988\n","\n","This is a copy of UCI ML Wine recognition datasets.\n","https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n","\n","The data is the results of a chemical analysis of wines grown in the same\n","region in Italy by three different cultivators. There are thirteen different\n","measurements taken for different constituents found in the three types of\n","wine.\n","\n","Original Owners: \n","\n","Forina, M. et al, PARVUS - \n","An Extendible Package for Data Exploration, Classification and Correlation. \n","Institute of Pharmaceutical and Food Analysis and Technologies,\n","Via Brigata Salerno, 16147 Genoa, Italy.\n","\n","Citation:\n","\n","Lichman, M. (2013). UCI Machine Learning Repository\n","[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,\n","School of Information and Computer Science. \n","\n",".. topic:: References\n","\n"," (1) S. Aeberhard, D. Coomans and O. de Vel, \n"," Comparison of Classifiers in High Dimensional Settings, \n"," Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of \n"," Mathematics and Statistics, James Cook University of North Queensland. \n"," (Also submitted to Technometrics). \n","\n"," The data was used with many others for comparing various \n"," classifiers. The classes are separable, though only RDA \n"," has achieved 100% correct classification. \n"," (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) \n"," (All results using the leave-one-out technique) \n","\n"," (2) S. Aeberhard, D. Coomans and O. de Vel, \n"," \"THE CLASSIFICATION PERFORMANCE OF RDA\" \n"," Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of \n"," Mathematics and Statistics, James Cook University of North Queensland. \n"," (Also submitted to Journal of Chemometrics).\n","\n"]}],"source":["print(data[\"DESCR\"])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1T2sgtWDvpvx","outputId":"62213bec-7b91-4863-afe3-7a5c856dc3c5"},"outputs":[{"data":{"text/plain":["(3,)"]},"execution_count":55,"metadata":{},"output_type":"execute_result"}],"source":["#Сколько классов содержит целевая переменная датасета? \n","np.unique(data[\"target\"]).shape"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"dG980xw9vpvy","outputId":"dfe35300-d2bc-4e49-ac5e-cbdc7f5f9139"},"outputs":[{"data":{"text/plain":["array(['class_0', 'class_1', 'class_2'], dtype='\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n",""],"text/plain":[" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n","0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n","1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n","2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n","3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n","4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n","\n"," B LSTAT \n","0 3.92 1065.0 \n","1 3.40 1050.0 \n","2 3.17 1185.0 \n","3 3.45 1480.0 \n","4 2.93 735.0 "]},"execution_count":57,"metadata":{},"output_type":"execute_result"}],"source":["#На основе данных датасета (они содержатся в двумерном массиве Numpy) \n","#и названий признаков создайте датафрейм под названием X.\n","X = pd.DataFrame(data.data, columns=feature_names)\n","X.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"nxfO8Ye4vpv2","outputId":"5c339ce1-ee73-406a-995a-5533b35664b2"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 178 entries, 0 to 177\n","Data columns (total 13 columns):\n","CRIM 178 non-null float64\n","ZN 178 non-null float64\n","INDUS 178 non-null float64\n","CHAS 178 non-null float64\n","NOX 178 non-null float64\n","RM 178 non-null float64\n","AGE 178 non-null float64\n","DIS 178 non-null float64\n","RAD 178 non-null float64\n","TAX 178 non-null float64\n","PTRATIO 178 non-null float64\n","B 178 non-null float64\n","LSTAT 178 non-null float64\n","dtypes: float64(13)\n","memory usage: 18.2 KB\n"]}],"source":["#Выясните размер датафрейма X и установите, имеются ли в нем пропущенные значения.\n","X.info()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jQpYBv84vpv3","outputId":"11777d55-5920-4c5d-b941-428ca2421b23"},"outputs":[{"data":{"text/plain":["(178, 13)"]},"execution_count":59,"metadata":{},"output_type":"execute_result"}],"source":["X.shape"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"_CBcXjLQvpv5","outputId":"d265d9ab-340e-437e-9c8e-b40fa6996201"},"outputs":[{"data":{"text/plain":["CRIM 0\n","ZN 0\n","INDUS 0\n","CHAS 0\n","NOX 0\n","RM 0\n","AGE 0\n","DIS 0\n","RAD 0\n","TAX 0\n","PTRATIO 0\n","B 0\n","LSTAT 0\n","dtype: int64"]},"execution_count":60,"metadata":{},"output_type":"execute_result"}],"source":["X.isnull().astype(\"int\").sum()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"7BfndPdnvpv6","outputId":"c8975ee0-dfb2-470f-974d-cd18a80b6926"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 178 entries, 0 to 177\n","Data columns (total 14 columns):\n","CRIM 178 non-null float64\n","ZN 178 non-null float64\n","INDUS 178 non-null float64\n","CHAS 178 non-null float64\n","NOX 178 non-null float64\n","RM 178 non-null float64\n","AGE 178 non-null float64\n","DIS 178 non-null float64\n","RAD 178 non-null float64\n","TAX 178 non-null float64\n","PTRATIO 178 non-null float64\n","B 178 non-null float64\n","LSTAT 178 non-null float64\n","target 178 non-null int64\n","dtypes: float64(13), int64(1)\n","memory usage: 19.5 KB\n"]}],"source":["#Добавьте в датафрейм поле с классами вин в виде чисел, имеющих тип данных numpy.int64. Название поля - 'target'.\n","X[\"target\"]=data[\"target\"].astype(np.int64)\n","X.info()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zvaWeMYrvpv8","outputId":"e32d2f61-8c6d-45b4-f0bc-76da468d1e65"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
\n","
"],"text/plain":[" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n","0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n","1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n","2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n","3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n","4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n","\n"," B LSTAT target \n","0 3.92 1065.0 0 \n","1 3.40 1050.0 0 \n","2 3.17 1185.0 0 \n","3 3.45 1480.0 0 \n","4 2.93 735.0 0 "]},"execution_count":62,"metadata":{},"output_type":"execute_result"}],"source":["X.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Ik_Jy89Qvpv9","outputId":"c89a2f16-a147-40b6-b5f7-c4e3b0ceeab7"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
CRIM1.0000000.0943970.211545-0.3102350.2707980.2891010.236815-0.1559290.1366980.546364-0.0717470.0723430.643720-0.328222
ZN0.0943971.0000000.1640450.288500-0.054575-0.335167-0.4110070.292977-0.2207460.248985-0.561296-0.368710-0.1920110.437776
INDUS0.2115450.1640451.0000000.4433670.2865870.1289800.1150770.1862300.0096520.258887-0.0746670.0039110.223626-0.049643
CHAS-0.3102350.2885000.4433671.000000-0.083333-0.321113-0.3513700.361922-0.1973270.018732-0.273955-0.276769-0.4405970.517859
NOX0.270798-0.0545750.286587-0.0833331.0000000.2144010.195784-0.2562940.2364410.1999500.0553980.0660040.393351-0.209179
RM0.289101-0.3351670.128980-0.3211130.2144011.0000000.864564-0.4499350.612413-0.0551360.4336810.6999490.498115-0.719163
AGE0.236815-0.4110070.115077-0.3513700.1957840.8645641.000000-0.5379000.652692-0.1723790.5434790.7871940.494193-0.847498
DIS-0.1559290.2929770.1862300.361922-0.256294-0.449935-0.5379001.000000-0.3658450.139057-0.262640-0.503270-0.3113850.489109
RAD0.136698-0.2207460.009652-0.1973270.2364410.6124130.652692-0.3658451.000000-0.0252500.2955440.5190670.330417-0.499130
TAX0.5463640.2489850.2588870.0187320.199950-0.055136-0.1723790.139057-0.0252501.000000-0.521813-0.4288150.3161000.265668
PTRATIO-0.071747-0.561296-0.074667-0.2739550.0553980.4336810.543479-0.2626400.295544-0.5218131.0000000.5654680.236183-0.617369
B0.072343-0.3687100.003911-0.2767690.0660040.6999490.787194-0.5032700.519067-0.4288150.5654681.0000000.312761-0.788230
LSTAT0.643720-0.1920110.223626-0.4405970.3933510.4981150.494193-0.3113850.3304170.3161000.2361830.3127611.000000-0.633717
target-0.3282220.437776-0.0496430.517859-0.209179-0.719163-0.8474980.489109-0.4991300.265668-0.617369-0.788230-0.6337171.000000
\n","
"],"text/plain":[" CRIM ZN INDUS CHAS NOX RM AGE \\\n","CRIM 1.000000 0.094397 0.211545 -0.310235 0.270798 0.289101 0.236815 \n","ZN 0.094397 1.000000 0.164045 0.288500 -0.054575 -0.335167 -0.411007 \n","INDUS 0.211545 0.164045 1.000000 0.443367 0.286587 0.128980 0.115077 \n","CHAS -0.310235 0.288500 0.443367 1.000000 -0.083333 -0.321113 -0.351370 \n","NOX 0.270798 -0.054575 0.286587 -0.083333 1.000000 0.214401 0.195784 \n","RM 0.289101 -0.335167 0.128980 -0.321113 0.214401 1.000000 0.864564 \n","AGE 0.236815 -0.411007 0.115077 -0.351370 0.195784 0.864564 1.000000 \n","DIS -0.155929 0.292977 0.186230 0.361922 -0.256294 -0.449935 -0.537900 \n","RAD 0.136698 -0.220746 0.009652 -0.197327 0.236441 0.612413 0.652692 \n","TAX 0.546364 0.248985 0.258887 0.018732 0.199950 -0.055136 -0.172379 \n","PTRATIO -0.071747 -0.561296 -0.074667 -0.273955 0.055398 0.433681 0.543479 \n","B 0.072343 -0.368710 0.003911 -0.276769 0.066004 0.699949 0.787194 \n","LSTAT 0.643720 -0.192011 0.223626 -0.440597 0.393351 0.498115 0.494193 \n","target -0.328222 0.437776 -0.049643 0.517859 -0.209179 -0.719163 -0.847498 \n","\n"," DIS RAD TAX PTRATIO B LSTAT target \n","CRIM -0.155929 0.136698 0.546364 -0.071747 0.072343 0.643720 -0.328222 \n","ZN 0.292977 -0.220746 0.248985 -0.561296 -0.368710 -0.192011 0.437776 \n","INDUS 0.186230 0.009652 0.258887 -0.074667 0.003911 0.223626 -0.049643 \n","CHAS 0.361922 -0.197327 0.018732 -0.273955 -0.276769 -0.440597 0.517859 \n","NOX -0.256294 0.236441 0.199950 0.055398 0.066004 0.393351 -0.209179 \n","RM -0.449935 0.612413 -0.055136 0.433681 0.699949 0.498115 -0.719163 \n","AGE -0.537900 0.652692 -0.172379 0.543479 0.787194 0.494193 -0.847498 \n","DIS 1.000000 -0.365845 0.139057 -0.262640 -0.503270 -0.311385 0.489109 \n","RAD -0.365845 1.000000 -0.025250 0.295544 0.519067 0.330417 -0.499130 \n","TAX 0.139057 -0.025250 1.000000 -0.521813 -0.428815 0.316100 0.265668 \n","PTRATIO -0.262640 0.295544 -0.521813 1.000000 0.565468 0.236183 -0.617369 \n","B -0.503270 0.519067 -0.428815 0.565468 1.000000 0.312761 -0.788230 \n","LSTAT -0.311385 0.330417 0.316100 0.236183 0.312761 1.000000 -0.633717 \n","target 0.489109 -0.499130 0.265668 -0.617369 -0.788230 -0.633717 1.000000 "]},"execution_count":63,"metadata":{},"output_type":"execute_result"}],"source":["#Постройте матрицу корреляций для всех полей X. Дайте полученному датафрейму название X_corr.\n","X_corr=X.corr()\n","X_corr"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ufH2XtFcvpv-","outputId":"c2d64419-a780-484b-9085-f2bc0865e8f4"},"outputs":[{"data":{"text/plain":["['CHAS', 'RM', 'AGE', 'PTRATIO', 'B', 'LSTAT']"]},"execution_count":64,"metadata":{},"output_type":"execute_result"}],"source":["#Создайте список high_corr из признаков, корреляция которых с полем target по абсолютному \n","#значению превышает 0.5 (причем, само поле target не должно входить в этот список).\n","high_corr=X_corr[\"target\"]\n","high_corr=high_corr[np.abs(high_corr)>0.5].drop(\"target\", axis=0)\n","high_corr=list(high_corr.index)\n","high_corr"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"VC3e1oB3vpwA","outputId":"fc199182-3bc2-4172-f1a3-041e00999647"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n","
"],"text/plain":[" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n","0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n","1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n","2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n","3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n","4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n","\n"," B LSTAT \n","0 3.92 1065.0 \n","1 3.40 1050.0 \n","2 3.17 1185.0 \n","3 3.45 1480.0 \n","4 2.93 735.0 "]},"execution_count":65,"metadata":{},"output_type":"execute_result"}],"source":["#Удалите из датафрейма X поле с целевой переменной. \n","X=X.drop(\"target\", axis=1)\n","X.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"AvKFv4FzvpwB","outputId":"c004b4e6-d4f1-46f3-f358-54f9a166e6fd"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATCHAS_2RM_2AGE_2PTRATIO_2B_2LSTAT_2
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0243.367.84009.36361.081615.36641134225.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0125.447.02257.61761.102511.56001102500.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0345.967.840010.49761.060910.04891404225.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0282.2414.822512.18010.739611.90252190400.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0441.007.84007.23611.08168.5849540225.0
\n","
"],"text/plain":[" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n","0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n","1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n","2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n","3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n","4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n","\n"," B LSTAT CHAS_2 RM_2 AGE_2 PTRATIO_2 B_2 LSTAT_2 \n","0 3.92 1065.0 243.36 7.8400 9.3636 1.0816 15.3664 1134225.0 \n","1 3.40 1050.0 125.44 7.0225 7.6176 1.1025 11.5600 1102500.0 \n","2 3.17 1185.0 345.96 7.8400 10.4976 1.0609 10.0489 1404225.0 \n","3 3.45 1480.0 282.24 14.8225 12.1801 0.7396 11.9025 2190400.0 \n","4 2.93 735.0 441.00 7.8400 7.2361 1.0816 8.5849 540225.0 "]},"execution_count":66,"metadata":{},"output_type":"execute_result"}],"source":["#Для всех признаков, названия которых содержатся в списке high_corr, вычислите квадрат их \n","#значений и добавьте в датафрейм X соответствующие поля с суффиксом '_2', добавленного к \n","#первоначальному названию признака. Итоговый датафрейм должен содержать все поля, которые, \n","#были в нем изначально, а также поля с признаками из списка high_corr, возведенными в квадрат. \n","for i in high_corr:\n"," X[i+\"_2\"]=X[i]**2\n","X.head()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"4DVqJzT8vpwC","outputId":"991a6101-6477-46fa-e5bf-be5ea4706229"},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATCHAS_2RM_2AGE_2PTRATIO_2B_2LSTAT_2
count178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.0000001.780000e+02
mean13.0006182.3363482.36651719.49494499.7415732.2951122.0292700.3618541.5908995.0580900.9574492.611685746.893258391.1428655.6570305.1100490.9686617.3221556.564591e+05
std0.8118271.1171460.2743443.33956414.2824840.6258510.9988590.1244530.5723592.3182860.2285720.709990314.907474133.6717752.9362944.2114410.4437983.5843165.558591e+05
min11.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000112.3600000.9604000.1156000.2304001.6129007.728400e+04
25%12.3625001.6025002.21000017.20000088.0000001.7425001.2050000.2700001.2500003.2200000.7825001.937500500.500000295.8400003.0363251.4521000.6123253.7540752.505010e+05
50%13.0500001.8650002.36000019.50000098.0000002.3550002.1350000.3400001.5550004.6900000.9650002.780000673.500000380.2500005.5460504.5582500.9312507.7284004.536045e+05
75%13.6775003.0825002.55750021.500000107.0000002.8000002.8750000.4375001.9500006.2000001.1200003.170000985.000000462.2500007.8400008.2657001.25440010.0489009.702250e+05
max14.8300005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000900.00000015.05440025.8064002.92410016.0000002.822400e+06
\n","
"],"text/plain":[" CRIM ZN INDUS CHAS NOX RM \\\n","count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 \n","mean 13.000618 2.336348 2.366517 19.494944 99.741573 2.295112 \n","std 0.811827 1.117146 0.274344 3.339564 14.282484 0.625851 \n","min 11.030000 0.740000 1.360000 10.600000 70.000000 0.980000 \n","25% 12.362500 1.602500 2.210000 17.200000 88.000000 1.742500 \n","50% 13.050000 1.865000 2.360000 19.500000 98.000000 2.355000 \n","75% 13.677500 3.082500 2.557500 21.500000 107.000000 2.800000 \n","max 14.830000 5.800000 3.230000 30.000000 162.000000 3.880000 \n","\n"," AGE DIS RAD TAX PTRATIO B \\\n","count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 \n","mean 2.029270 0.361854 1.590899 5.058090 0.957449 2.611685 \n","std 0.998859 0.124453 0.572359 2.318286 0.228572 0.709990 \n","min 0.340000 0.130000 0.410000 1.280000 0.480000 1.270000 \n","25% 1.205000 0.270000 1.250000 3.220000 0.782500 1.937500 \n","50% 2.135000 0.340000 1.555000 4.690000 0.965000 2.780000 \n","75% 2.875000 0.437500 1.950000 6.200000 1.120000 3.170000 \n","max 5.080000 0.660000 3.580000 13.000000 1.710000 4.000000 \n","\n"," LSTAT CHAS_2 RM_2 AGE_2 PTRATIO_2 \\\n","count 178.000000 178.000000 178.000000 178.000000 178.000000 \n","mean 746.893258 391.142865 5.657030 5.110049 0.968661 \n","std 314.907474 133.671775 2.936294 4.211441 0.443798 \n","min 278.000000 112.360000 0.960400 0.115600 0.230400 \n","25% 500.500000 295.840000 3.036325 1.452100 0.612325 \n","50% 673.500000 380.250000 5.546050 4.558250 0.931250 \n","75% 985.000000 462.250000 7.840000 8.265700 1.254400 \n","max 1680.000000 900.000000 15.054400 25.806400 2.924100 \n","\n"," B_2 LSTAT_2 \n","count 178.000000 1.780000e+02 \n","mean 7.322155 6.564591e+05 \n","std 3.584316 5.558591e+05 \n","min 1.612900 7.728400e+04 \n","25% 3.754075 2.505010e+05 \n","50% 7.728400 4.536045e+05 \n","75% 10.048900 9.702250e+05 \n","max 16.000000 2.822400e+06 "]},"execution_count":67,"metadata":{},"output_type":"execute_result"}],"source":["#Выведите описание полей датафрейма X с помощью метода describe.\n","X.describe()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"XVom2-ZWvpwD"},"outputs":[],"source":[""]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.3"},"colab":{"name":"dz_analitic3.ipynb\"","provenance":[{"file_id":"1zczSw0aBbnW4AOv5ODmOWuQ72F8HZjxr","timestamp":1645102884965}]}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tTr7YqcVvptR" + }, + "outputs": [], + "source": [ + "#1. Импортируйте библиотеки pandas и numpy.\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 644, + "status": "ok", + "timestamp": 1645102447075, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "uvJgmxd9vptr", + "outputId": "d631acd5-84d5-454c-b731-066948675442" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.\n", + "\n", + " The Boston housing prices dataset has an ethical problem. You can refer to\n", + " the documentation of this function for further details.\n", + "\n", + " The scikit-learn maintainers therefore strongly discourage the use of this\n", + " dataset unless the purpose of the code is to study and educate about\n", + " ethical issues in data science and machine learning.\n", + "\n", + " In this special case, you can fetch the dataset from the original\n", + " source::\n", + "\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + "\n", + " data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + " raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", + " data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + " target = raw_df.values[1::2, 2]\n", + "\n", + " Alternative datasets include the California housing dataset (i.e.\n", + " :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing\n", + " dataset. You can load the datasets as follows::\n", + "\n", + " from sklearn.datasets import fetch_california_housing\n", + " housing = fetch_california_housing()\n", + "\n", + " for the California housing dataset and::\n", + "\n", + " from sklearn.datasets import fetch_openml\n", + " housing = fetch_openml(name=\"house_prices\", as_frame=True)\n", + "\n", + " for the Ames housing dataset.\n", + " \n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Загрузите \"Boston House Prices dataset\" из встроенных наборов данных библиотеки sklearn. \n", + "from sklearn.datasets import load_boston\n", + "boston = load_boston()\n", + "boston.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 680, + "status": "ok", + "timestamp": 1645102482478, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "biPusKfIvpt2", + "outputId": "6ef83c33-828a-4c8b-a379-613d828516bc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(506, 13)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Создайте датафреймы X и y из этих данных.\n", + "data = boston[\"data\"]\n", + "data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 582, + "status": "ok", + "timestamp": 1645102496787, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "BqL6OHzovpt7", + "outputId": "5a3f82f5-1421-435a-de64-6c14aa6b036a" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',\n", + " 'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.98
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.14
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.03
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.94
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.33
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX ... RAD TAX PTRATIO B LSTAT\n", + "0 0.00632 18.0 2.31 0.0 0.538 ... 1.0 296.0 15.3 396.90 4.98\n", + "1 0.02731 0.0 7.07 0.0 0.469 ... 2.0 242.0 17.8 396.90 9.14\n", + "2 0.02729 0.0 7.07 0.0 0.469 ... 2.0 242.0 17.8 392.83 4.03\n", + "3 0.03237 0.0 2.18 0.0 0.458 ... 3.0 222.0 18.7 394.63 2.94\n", + "4 0.06905 0.0 2.18 0.0 0.458 ... 3.0 222.0 18.7 396.90 5.33\n", + "\n", + "[5 rows x 13 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = pd.DataFrame(data, columns=feature_names)\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 494, + "status": "ok", + "timestamp": 1645102602224, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "44nL6Sa9vpuH", + "outputId": "755e9f26-8f7c-4a23-bfd5-7d487ba5dd87" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 506 entries, 0 to 505\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 CRIM 506 non-null float64\n", + " 1 ZN 506 non-null float64\n", + " 2 INDUS 506 non-null float64\n", + " 3 CHAS 506 non-null float64\n", + " 4 NOX 506 non-null float64\n", + " 5 RM 506 non-null float64\n", + " 6 AGE 506 non-null float64\n", + " 7 DIS 506 non-null float64\n", + " 8 RAD 506 non-null float64\n", + " 9 TAX 506 non-null float64\n", + " 10 PTRATIO 506 non-null float64\n", + " 11 B 506 non-null float64\n", + " 12 LSTAT 506 non-null float64\n", + "dtypes: float64(13)\n", + "memory usage: 51.5 KB\n" + ] + } + ], + "source": [ + "X.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 857, + "status": "ok", + "timestamp": 1645102631315, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "QX6NtkRvvpuL", + "outputId": "e3de32df-8986-4d87-ddfe-4dd178f79bca" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 506 entries, 0 to 505\n", + "Data columns (total 1 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 price 506 non-null float64\n", + "dtypes: float64(1)\n", + "memory usage: 4.1 KB\n" + ] + } + ], + "source": [ + "y = pd.DataFrame(target, columns=[\"price\"])\n", + "y.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J_pVV9ynvpuQ" + }, + "outputs": [], + "source": [ + "#Разбейте эти датафреймы на тренировочные (X_train, y_train) и тестовые (X_test, y_test) \n", + "#с помощью функции train_test_split так, чтобы размер тестовой выборки составлял 30% от \n", + "#всех данных, при этом аргумент random_state должен быть равен 42. \n", + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xK_P8XD7vpuX" + }, + "outputs": [], + "source": [ + "#Создайте модель линейной регрессии под названием lr с помощью класса \n", + "#LinearRegression из модуля sklearn.linear_model. \n", + "from sklearn.linear_model import LinearRegression\n", + "lr = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 581, + "status": "ok", + "timestamp": 1645102675535, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "KZrt-xrwvpub", + "outputId": "8e18ae4d-9407-496a-8a61-08b62d90fb67" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Обучите модель на тренировочных данных (используйте все признаки) и сделайте предсказание на тестовых.\n", + "lr.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 518, + "status": "ok", + "timestamp": 1645102686553, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "wuW0wldkvpue", + "outputId": "96769c07-009e-4da9-c891-b231195193fd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(152, 1)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = lr.predict(X_test)\n", + "y_pred.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "executionInfo": { + "elapsed": 623, + "status": "ok", + "timestamp": 1645102693702, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "gIDIpPrevpui", + "outputId": "2d7a1230-e409-4e1f-f87d-9420a8d5e873" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
y_testy_pred
9222.928.998112
36025.023.472965
3877.46.862435
4897.06.479228
30728.233.423096
28822.327.306120
45216.118.793646
20348.541.576196
23625.129.342199
4005.612.654184
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " y_test y_pred\n", + "92 22.9 28.998112\n", + "360 25.0 23.472965\n", + "387 7.4 6.862435\n", + "489 7.0 6.479228\n", + "307 28.2 33.423096\n", + "288 22.3 27.306120\n", + "452 16.1 18.793646\n", + "203 48.5 41.576196\n", + "236 25.1 29.342199\n", + "400 5.6 12.654184" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check_test = pd.DataFrame({\n", + " \"y_test\": y_test[\"price\"],\n", + " \"y_pred\": y_pred.flatten(),\n", + "})\n", + "\n", + "check_test.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "executionInfo": { + "elapsed": 637, + "status": "ok", + "timestamp": 1645102705122, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "DRKc79e4vpuk", + "outputId": "f92678e2-9fbc-4dea-8f27-3d99c7649566" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
y_testy_prederror
9222.928.9981126.098112
36025.023.472965-1.527035
3877.46.862435-0.537565
4897.06.479228-0.520772
30728.233.4230965.223096
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " y_test y_pred error\n", + "92 22.9 28.998112 6.098112\n", + "360 25.0 23.472965 -1.527035\n", + "387 7.4 6.862435 -0.537565\n", + "489 7.0 6.479228 -0.520772\n", + "307 28.2 33.423096 5.223096" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Вычислите R2 полученных предказаний с помощью r2_score из модуля sklearn.metrics.\n", + "check_test[\"error\"] = check_test[\"y_pred\"] - check_test[\"y_test\"]\n", + "check_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 586, + "status": "ok", + "timestamp": 1645102725363, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "gXSMLW8Qvpun", + "outputId": "f9e307f1-b8ca-47d4-a259-8aac6acc3944" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6436988807960613" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import r2_score\n", + "r2_score_1=r2_score(check_test[\"y_pred\"], check_test[\"y_test\"])\n", + "r2_score_1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0DzYfsSGvpup" + }, + "outputs": [], + "source": [ + "#2. Создайте модель под названием model с помощью RandomForestRegressor из модуля sklearn.ensemble.\n", + "#Сделайте агрумент n_estimators равным 1000, max_depth должен быть равен 12 и random_state сделайте равным 42. \n", + "from sklearn.ensemble import RandomForestRegressor\n", + "model = RandomForestRegressor(n_estimators=1000, max_depth=12, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 2797, + "status": "ok", + "timestamp": 1645102745210, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "Dk2V8hhwvpus", + "outputId": "0246f5c8-5d5f-4ee1-9e20-5a6e25f1363e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestRegressor(max_depth=12, n_estimators=1000, random_state=42)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Обучите модель на тренировочных данных аналогично тому, как вы обучали модель LinearRegression, \n", + "#но при этом в метод fit вместо датафрейма y_train поставьте y_train.values[:, 0], чтобы получить \n", + "#из датафрейма одномерный массив Numpy, так как для класса RandomForestRegressor в данном методе \n", + "#для аргумента y предпочтительно применение массивов вместо датафрейма.\n", + "model.fit(X_train, y_train.values[:, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iUQM3rgdvpuw", + "outputId": "7927a90b-3b43-4694-a7b8-787e0e4e9720" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(152,)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Сделайте предсказание на тестовых данных и посчитайте R2.\n", + "y_pred = model.predict(X_test)\n", + "y_pred.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "executionInfo": { + "elapsed": 630, + "status": "ok", + "timestamp": 1645102751782, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "_fspTr2qvpuz", + "outputId": "ee59cb85-4482-4df6-c7d0-bf4602900682" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
y_testy_pred
9222.928.998112
36025.023.472965
3877.46.862435
4897.06.479228
30728.233.423096
28822.327.306120
45216.118.793646
20348.541.576196
23625.129.342199
4005.612.654184
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " y_test y_pred\n", + "92 22.9 28.998112\n", + "360 25.0 23.472965\n", + "387 7.4 6.862435\n", + "489 7.0 6.479228\n", + "307 28.2 33.423096\n", + "288 22.3 27.306120\n", + "452 16.1 18.793646\n", + "203 48.5 41.576196\n", + "236 25.1 29.342199\n", + "400 5.6 12.654184" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "check_test = pd.DataFrame({\n", + " \"y_test\": y_test[\"price\"],\n", + " \"y_pred\": y_pred.flatten(),\n", + "})\n", + "\n", + "check_test.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 940, + "status": "ok", + "timestamp": 1645102763418, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "qtKz0tvsvpu2", + "outputId": "9a487e98-5aa0-40be-fab2-550493567ce3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6436988807960613" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r2_score_2=r2_score(check_test[\"y_pred\"], check_test[\"y_test\"])\n", + "r2_score_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 17, + "status": "ok", + "timestamp": 1645102766795, + "user": { + "displayName": "Сергей Шибанов", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gg4en2utGnNuLKMiqN6wyZh28ra38UycEqB-n63=s64", + "userId": "00971423091508291527" + }, + "user_tz": -300 + }, + "id": "r368oFU1vpu5", + "outputId": "515b6ba6-522a-4b1b-f7cc-2725783da9cb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Сравните с результатом из предыдущего задания.\n", + "#Напишите в комментариях к коду, какая модель в данном случае работает лучше.\n", + "r2_score_1\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.363787...-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425...-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.514654...0.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024...-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.817739...-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n", + "

5 rows × 31 columns

\n", + "" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "\n", + " V8 V9 ... V21 V22 V23 V24 V25 \\\n", + "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n", + "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n", + "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n", + "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n", + "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n", + "\n", + " V26 V27 V28 Amount Class \n", + "0 -0.189115 0.133558 -0.021053 149.62 0 \n", + "1 0.125895 -0.008983 0.014724 2.69 0 \n", + "2 -0.139097 -0.055353 -0.059752 378.66 0 \n", + "3 -0.221929 0.062723 0.061458 123.50 0 \n", + "4 0.502292 0.219422 0.215153 69.99 0 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Загрузите датасет creditcard.csv и создайте датафрейм df.\n", + "df = pd.read_csv('creditcard.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZKI6KQ9PvpvJ", + "outputId": "ddb8fa72-c690-4769-eb8b-93a503c012a8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.998273\n", + "1 0.001727\n", + "Name: Class, dtype: float64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#С помощью метода value_counts с аргументом normalize=True убедитесь в том, что выборка несбалансирована.\n", + "df[\"Class\"].value_counts(normalize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-nD44QtOvpvL", + "outputId": "08424e0c-28df-4d34-9421-9c0b94390f08" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 284807 entries, 0 to 284806\n", + "Data columns (total 31 columns):\n", + "Time 284807 non-null float64\n", + "V1 284807 non-null float64\n", + "V2 284807 non-null float64\n", + "V3 284807 non-null float64\n", + "V4 284807 non-null float64\n", + "V5 284807 non-null float64\n", + "V6 284807 non-null float64\n", + "V7 284807 non-null float64\n", + "V8 284807 non-null float64\n", + "V9 284807 non-null float64\n", + "V10 284807 non-null float64\n", + "V11 284807 non-null float64\n", + "V12 284807 non-null float64\n", + "V13 284807 non-null float64\n", + "V14 284807 non-null float64\n", + "V15 284807 non-null float64\n", + "V16 284807 non-null float64\n", + "V17 284807 non-null float64\n", + "V18 284807 non-null float64\n", + "V19 284807 non-null float64\n", + "V20 284807 non-null float64\n", + "V21 284807 non-null float64\n", + "V22 284807 non-null float64\n", + "V23 284807 non-null float64\n", + "V24 284807 non-null float64\n", + "V25 284807 non-null float64\n", + "V26 284807 non-null float64\n", + "V27 284807 non-null float64\n", + "V28 284807 non-null float64\n", + "Amount 284807 non-null float64\n", + "Class 284807 non-null int64\n", + "dtypes: float64(30), int64(1)\n", + "memory usage: 67.4 MB\n" + ] + } + ], + "source": [ + "#Используя метод info, проверьте, все ли столбцы содержат числовые данные\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K4BQ67jjvpvM", + "outputId": "a0000c45-4a9e-41da-bd7f-3e263779142e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Time 0\n", + "V1 0\n", + "V2 0\n", + "V3 0\n", + "V4 0\n", + "V5 0\n", + "V6 0\n", + "V7 0\n", + "V8 0\n", + "V9 0\n", + "V10 0\n", + "V11 0\n", + "V12 0\n", + "V13 0\n", + "V14 0\n", + "V15 0\n", + "V16 0\n", + "V17 0\n", + "V18 0\n", + "V19 0\n", + "V20 0\n", + "V21 0\n", + "V22 0\n", + "V23 0\n", + "V24 0\n", + "V25 0\n", + "V26 0\n", + "V27 0\n", + "V28 0\n", + "Amount 0\n", + "Class 0\n", + "dtype: int32" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#и нет ли в них пропусков\n", + "df.isnull().astype(np.int).sum().astype(np.int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BqwtVoU8vpvN" + }, + "outputs": [], + "source": [ + "#Примените следующую настройку, чтобы можно было просматривать все столбцы датафрейма: \n", + "#pd.options.display.max_columns = 100.\n", + "pd.options.display.max_columns = 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6IKKcAiSvpvO", + "outputId": "6c887466-9f4d-4e1f-83f2-f41722364aea" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
52.0-0.4259660.9605231.141109-0.1682520.420987-0.0297280.4762010.260314-0.568671-0.3714071.3412620.359894-0.358091-0.1371340.5176170.401726-0.0581330.068653-0.0331940.084968-0.208254-0.559825-0.026398-0.371427-0.2327940.1059150.2538440.0810803.670
64.01.2296580.1410040.0453711.2026130.1918810.272708-0.0051590.0812130.464960-0.099254-1.416907-0.153826-0.7510630.1673720.050144-0.4435870.002821-0.611987-0.045575-0.219633-0.167716-0.270710-0.154104-0.7800550.750137-0.2572370.0345070.0051684.990
77.0-0.6442691.4179641.074380-0.4921990.9489340.4281181.120631-3.8078640.6153751.249376-0.6194680.2914741.757964-1.3238650.686133-0.076127-1.222127-0.3582220.324505-0.1567421.943465-1.0154550.057504-0.649709-0.415267-0.051634-1.206921-1.08533940.800
87.0-0.8942860.286157-0.113192-0.2715262.6695993.7218180.3701450.851084-0.392048-0.410430-0.705117-0.110452-0.2862540.074355-0.328783-0.210077-0.4997680.1187650.5703280.052736-0.073425-0.268092-0.2042331.0115920.373205-0.3841570.0117470.14240493.200
99.0-0.3382621.1195931.044367-0.2221870.499361-0.2467610.6515830.069539-0.736727-0.3668461.0176140.8363901.006844-0.4435230.1502190.739453-0.5409800.4766770.4517730.203711-0.246914-0.633753-0.120794-0.385050-0.0697330.0941990.2462190.0830763.680
\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "5 2.0 -0.425966 0.960523 1.141109 -0.168252 0.420987 -0.029728 0.476201 \n", + "6 4.0 1.229658 0.141004 0.045371 1.202613 0.191881 0.272708 -0.005159 \n", + "7 7.0 -0.644269 1.417964 1.074380 -0.492199 0.948934 0.428118 1.120631 \n", + "8 7.0 -0.894286 0.286157 -0.113192 -0.271526 2.669599 3.721818 0.370145 \n", + "9 9.0 -0.338262 1.119593 1.044367 -0.222187 0.499361 -0.246761 0.651583 \n", + "\n", + " V8 V9 V10 V11 V12 V13 V14 \\\n", + "0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 \n", + "1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 \n", + "2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 \n", + "3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 \n", + "4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 \n", + "5 0.260314 -0.568671 -0.371407 1.341262 0.359894 -0.358091 -0.137134 \n", + "6 0.081213 0.464960 -0.099254 -1.416907 -0.153826 -0.751063 0.167372 \n", + "7 -3.807864 0.615375 1.249376 -0.619468 0.291474 1.757964 -1.323865 \n", + "8 0.851084 -0.392048 -0.410430 -0.705117 -0.110452 -0.286254 0.074355 \n", + "9 0.069539 -0.736727 -0.366846 1.017614 0.836390 1.006844 -0.443523 \n", + "\n", + " V15 V16 V17 V18 V19 V20 V21 \\\n", + "0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 \n", + "1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 \n", + "2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 \n", + "3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 \n", + "4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 \n", + "5 0.517617 0.401726 -0.058133 0.068653 -0.033194 0.084968 -0.208254 \n", + "6 0.050144 -0.443587 0.002821 -0.611987 -0.045575 -0.219633 -0.167716 \n", + "7 0.686133 -0.076127 -1.222127 -0.358222 0.324505 -0.156742 1.943465 \n", + "8 -0.328783 -0.210077 -0.499768 0.118765 0.570328 0.052736 -0.073425 \n", + "9 0.150219 0.739453 -0.540980 0.476677 0.451773 0.203711 -0.246914 \n", + "\n", + " V22 V23 V24 V25 V26 V27 V28 \\\n", + "0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 \n", + "1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 \n", + "2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 \n", + "3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 \n", + "4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 \n", + "5 -0.559825 -0.026398 -0.371427 -0.232794 0.105915 0.253844 0.081080 \n", + "6 -0.270710 -0.154104 -0.780055 0.750137 -0.257237 0.034507 0.005168 \n", + "7 -1.015455 0.057504 -0.649709 -0.415267 -0.051634 -1.206921 -1.085339 \n", + "8 -0.268092 -0.204233 1.011592 0.373205 -0.384157 0.011747 0.142404 \n", + "9 -0.633753 -0.120794 -0.385050 -0.069733 0.094199 0.246219 0.083076 \n", + "\n", + " Amount Class \n", + "0 149.62 0 \n", + "1 2.69 0 \n", + "2 378.66 0 \n", + "3 123.50 0 \n", + "4 69.99 0 \n", + "5 3.67 0 \n", + "6 4.99 0 \n", + "7 40.80 0 \n", + "8 93.20 0 \n", + "9 3.68 0 " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Просмотрите первые 10 строк датафрейма df.\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gWS6n9-CvpvR", + "outputId": "f85ad98a-dede-415a-84c4-57fbde41ab6c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28Amount
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.62
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.69
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.66
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.50
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.99
\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "\n", + " V8 V9 V10 V11 V12 V13 V14 \\\n", + "0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 \n", + "1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 \n", + "2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 \n", + "3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 \n", + "4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 \n", + "\n", + " V15 V16 V17 V18 V19 V20 V21 \\\n", + "0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 \n", + "1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 \n", + "2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 \n", + "3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 \n", + "4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 \n", + "\n", + " V22 V23 V24 V25 V26 V27 V28 \\\n", + "0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 \n", + "1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 \n", + "2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 \n", + "3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 \n", + "4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 \n", + "\n", + " Amount \n", + "0 149.62 \n", + "1 2.69 \n", + "2 378.66 \n", + "3 123.50 \n", + "4 69.99 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Создайте датафрейм X из датафрейма df, исключив столбец Class.\n", + "X = df.drop(\"Class\", axis=1)\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oeJlki8zvpvT", + "outputId": "cdea01bf-047f-4c04-82f2-d84661d0cce8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "Name: Class, dtype: int64" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Создайте объект Series под названием y из столбца Class.\n", + "y = df[\"Class\"]\n", + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jUtviSb5vpvU" + }, + "outputs": [], + "source": [ + "#Разбейте X и y на тренировочный и тестовый наборы данных при помощи функции train_test_split, \n", + "#используя аргументы: test_size=0.3, random_state=100, stratify=y.\n", + "#У вас должны получиться объекты X_train, X_test, y_train и y_test.\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100, stratify=y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mKjD8fYnvpvV", + "outputId": "20f76527-3824-47ca-8b76-5af8ba856a95" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "((199364, 30), (85443, 30), (199364,), (85443,))" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Просмотрите информацию о их форме.\n", + "X_train.shape, X_test.shape, y_train.shape, y_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PeODdXarvpvW" + }, + "outputs": [], + "source": [ + "#Для поиска по сетке параметров задайте такие параметры:\n", + "#parameters = [{'n_estimators': [10, 15], \n", + "#'max_features': np.arange(3, 5),\n", + "#'max_depth': np.arange(4, 7)}]\n", + "parameters = [{'n_estimators': [10, 15], \n", + "'max_features': np.arange(3, 5),\n", + "'max_depth': np.arange(4, 7)}]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TOj3o830vpvX" + }, + "outputs": [], + "source": [ + "#Создайте модель GridSearchCV со следующими аргументами:\n", + "#estimator=RandomForestClassifier(random_state=100), \n", + "#param_grid=parameters,\n", + "#scoring='roc_auc',\n", + "#cv=3.\n", + "clf = GridSearchCV(\n", + " estimator=RandomForestClassifier(random_state=100),\n", + " param_grid=parameters,\n", + " scoring='roc_auc',\n", + " cv=3,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LL15ShxtvpvZ", + "outputId": "c4501d3d-a033-4be4-cf75-37509c37aa81" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "GridSearchCV(cv=3, error_score='raise-deprecating',\n", + " estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,\n", + " oob_score=False, random_state=100, verbose=0, warm_start=False),\n", + " fit_params=None, iid='warn', n_jobs=None,\n", + " param_grid=[{'n_estimators': [10, 15], 'max_features': array([3, 4]), 'max_depth': array([4, 5, 6])}],\n", + " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n", + " scoring='roc_auc', verbose=0)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Обучите модель на тренировочном наборе данных (может занять несколько минут).\n", + "clf.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5pB3n-okvpva", + "outputId": "214555a6-70b7-45a0-8e45-082284913456" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 6, 'max_features': 3, 'n_estimators': 15}" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Просмотрите параметры лучшей модели с помощью атрибута best_params_.\n", + "clf.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q2KYdOYmvpvb", + "outputId": "e405a7e4-0831-40fb-f836-a0fb349a0f53" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[9.99070828e-01 9.29171738e-04]\n", + " [9.99704794e-01 2.95206364e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]\n", + " [9.99717846e-01 2.82154033e-04]]\n" + ] + } + ], + "source": [ + "#Предскажите вероятности классов с помощью полученнной модели и метода predict_proba.\n", + "y_pred_proba = clf.predict_proba(X_test)\n", + "print(y_pred_proba[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PdRiplGRvpvc", + "outputId": "4b853da7-a0f8-4689-bba8-e7dfa193d71c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.00092917 0.00029521 0.00028215 0.00028215 0.00028215]\n" + ] + } + ], + "source": [ + "#Из полученного результата (массив Numpy) выберите столбец с индексом 1 (вероятность класса 1) \n", + "#и запишите в массив y_pred_proba.\n", + "y_pred_proba = y_pred_proba[:, 1]\n", + "print(y_pred_proba[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mry7O1_Avpvd" + }, + "outputs": [], + "source": [ + "#Из модуля sklearn.metrics импортируйте метрику roc_auc_score.\n", + "from sklearn.metrics import roc_auc_score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yj5PuZXKvpve", + "outputId": "2c545ebf-3874-448b-a12a-f516e25e6ac5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9462664156037156" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Вычислите AUC на тестовых данных и сравните с результатом, \n", + "#полученным на тренировочных данных, используя в качестве аргументов\n", + "#массивы y_test и y_pred_proba.\n", + "roc_auc_score(y_test, y_pred_proba)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5jmj2gM_vpvg" + }, + "source": [ + "####Дополнительные задания:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W9__mHwVvpvn", + "outputId": "1e28e2a8-8544-4b4a-9f7f-fbba3d3d3c5d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Загрузите датасет Wine из встроенных датасетов sklearn.datasets с помощью функции load_wine в переменную data.\n", + "#Полученный датасет не является датафреймом. Это структура данных, имеющая ключи аналогично словарю. \n", + "from sklearn.datasets import load_wine\n", + "data = load_wine()\n", + "data.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uI-vDg-ivpvp", + "outputId": "2a0c391e-d6c3-489d-b574-58a36c821e15" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "sklearn.utils.Bunch" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Просмотрите тип данных этой структуры данных\n", + "type(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c83d-aNUvpvq", + "outputId": "5babde0e-fafe-46ac-d90d-3cfeff235dfb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['alcohol',\n", + " 'malic_acid',\n", + " 'ash',\n", + " 'alcalinity_of_ash',\n", + " 'magnesium',\n", + " 'total_phenols',\n", + " 'flavanoids',\n", + " 'nonflavanoid_phenols',\n", + " 'proanthocyanins',\n", + " 'color_intensity',\n", + " 'hue',\n", + " 'od280/od315_of_diluted_wines',\n", + " 'proline']" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#и создайте список data_keys, содержащий ее ключи.\n", + "data_keys=data[\"feature_names\"]\n", + "data_keys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XS0lGGuIvpvs", + "outputId": "24ce1d20-dd97-4588-c56a-16041e26eb83" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n", + " 1.065e+03],\n", + " [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n", + " 1.050e+03],\n", + " [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n", + " 1.185e+03],\n", + " ...,\n", + " [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n", + " 8.350e+02],\n", + " [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n", + " 8.400e+02],\n", + " [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n", + " 5.600e+02]])" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Просмотрите данные\n", + "data.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E13dmX7kvpvt", + "outputId": "24239ff3-d99d-4dd0-e1a7-56d1175a8d13" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(178, 13)" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JOYCfMA1vpvu", + "outputId": "7724e237-c0e0-41aa-bf3c-4be4005a5b4a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _wine_dataset:\n", + "\n", + "Wine recognition dataset\n", + "------------------------\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 178 (50 in each of three classes)\n", + " :Number of Attributes: 13 numeric, predictive attributes and the class\n", + " :Attribute Information:\n", + " \t\t- Alcohol\n", + " \t\t- Malic acid\n", + " \t\t- Ash\n", + "\t\t- Alcalinity of ash \n", + " \t\t- Magnesium\n", + "\t\t- Total phenols\n", + " \t\t- Flavanoids\n", + " \t\t- Nonflavanoid phenols\n", + " \t\t- Proanthocyanins\n", + "\t\t- Color intensity\n", + " \t\t- Hue\n", + " \t\t- OD280/OD315 of diluted wines\n", + " \t\t- Proline\n", + "\n", + " - class:\n", + " - class_0\n", + " - class_1\n", + " - class_2\n", + "\t\t\n", + " :Summary Statistics:\n", + " \n", + " ============================= ==== ===== ======= =====\n", + " Min Max Mean SD\n", + " ============================= ==== ===== ======= =====\n", + " Alcohol: 11.0 14.8 13.0 0.8\n", + " Malic Acid: 0.74 5.80 2.34 1.12\n", + " Ash: 1.36 3.23 2.36 0.27\n", + " Alcalinity of Ash: 10.6 30.0 19.5 3.3\n", + " Magnesium: 70.0 162.0 99.7 14.3\n", + " Total Phenols: 0.98 3.88 2.29 0.63\n", + " Flavanoids: 0.34 5.08 2.03 1.00\n", + " Nonflavanoid Phenols: 0.13 0.66 0.36 0.12\n", + " Proanthocyanins: 0.41 3.58 1.59 0.57\n", + " Colour Intensity: 1.3 13.0 5.1 2.3\n", + " Hue: 0.48 1.71 0.96 0.23\n", + " OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71\n", + " Proline: 278 1680 746 315\n", + " ============================= ==== ===== ======= =====\n", + "\n", + " :Missing Attribute Values: None\n", + " :Class Distribution: class_0 (59), class_1 (71), class_2 (48)\n", + " :Creator: R.A. Fisher\n", + " :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", + " :Date: July, 1988\n", + "\n", + "This is a copy of UCI ML Wine recognition datasets.\n", + "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n", + "\n", + "The data is the results of a chemical analysis of wines grown in the same\n", + "region in Italy by three different cultivators. There are thirteen different\n", + "measurements taken for different constituents found in the three types of\n", + "wine.\n", + "\n", + "Original Owners: \n", + "\n", + "Forina, M. et al, PARVUS - \n", + "An Extendible Package for Data Exploration, Classification and Correlation. \n", + "Institute of Pharmaceutical and Food Analysis and Technologies,\n", + "Via Brigata Salerno, 16147 Genoa, Italy.\n", + "\n", + "Citation:\n", + "\n", + "Lichman, M. (2013). UCI Machine Learning Repository\n", + "[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,\n", + "School of Information and Computer Science. \n", + "\n", + ".. topic:: References\n", + "\n", + " (1) S. Aeberhard, D. Coomans and O. de Vel, \n", + " Comparison of Classifiers in High Dimensional Settings, \n", + " Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Technometrics). \n", + "\n", + " The data was used with many others for comparing various \n", + " classifiers. The classes are separable, though only RDA \n", + " has achieved 100% correct classification. \n", + " (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) \n", + " (All results using the leave-one-out technique) \n", + "\n", + " (2) S. Aeberhard, D. Coomans and O. de Vel, \n", + " \"THE CLASSIFICATION PERFORMANCE OF RDA\" \n", + " Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Journal of Chemometrics).\n", + "\n" + ] + } + ], + "source": [ + "#описание и названия признаков в датасете. Описание нужно вывести в виде привычного, аккуратно \n", + "#оформленного текста, без обозначений переноса строки, но с самими переносами и т. д.\n", + "for line in data.DESCR.split('\\n'):\n", + " print(line)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RqPda_RJvpvw", + "outputId": "6b4daa2c-1010-430d-e792-01851c0ec438" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _wine_dataset:\n", + "\n", + "Wine recognition dataset\n", + "------------------------\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 178 (50 in each of three classes)\n", + " :Number of Attributes: 13 numeric, predictive attributes and the class\n", + " :Attribute Information:\n", + " \t\t- Alcohol\n", + " \t\t- Malic acid\n", + " \t\t- Ash\n", + "\t\t- Alcalinity of ash \n", + " \t\t- Magnesium\n", + "\t\t- Total phenols\n", + " \t\t- Flavanoids\n", + " \t\t- Nonflavanoid phenols\n", + " \t\t- Proanthocyanins\n", + "\t\t- Color intensity\n", + " \t\t- Hue\n", + " \t\t- OD280/OD315 of diluted wines\n", + " \t\t- Proline\n", + "\n", + " - class:\n", + " - class_0\n", + " - class_1\n", + " - class_2\n", + "\t\t\n", + " :Summary Statistics:\n", + " \n", + " ============================= ==== ===== ======= =====\n", + " Min Max Mean SD\n", + " ============================= ==== ===== ======= =====\n", + " Alcohol: 11.0 14.8 13.0 0.8\n", + " Malic Acid: 0.74 5.80 2.34 1.12\n", + " Ash: 1.36 3.23 2.36 0.27\n", + " Alcalinity of Ash: 10.6 30.0 19.5 3.3\n", + " Magnesium: 70.0 162.0 99.7 14.3\n", + " Total Phenols: 0.98 3.88 2.29 0.63\n", + " Flavanoids: 0.34 5.08 2.03 1.00\n", + " Nonflavanoid Phenols: 0.13 0.66 0.36 0.12\n", + " Proanthocyanins: 0.41 3.58 1.59 0.57\n", + " Colour Intensity: 1.3 13.0 5.1 2.3\n", + " Hue: 0.48 1.71 0.96 0.23\n", + " OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71\n", + " Proline: 278 1680 746 315\n", + " ============================= ==== ===== ======= =====\n", + "\n", + " :Missing Attribute Values: None\n", + " :Class Distribution: class_0 (59), class_1 (71), class_2 (48)\n", + " :Creator: R.A. Fisher\n", + " :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", + " :Date: July, 1988\n", + "\n", + "This is a copy of UCI ML Wine recognition datasets.\n", + "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n", + "\n", + "The data is the results of a chemical analysis of wines grown in the same\n", + "region in Italy by three different cultivators. There are thirteen different\n", + "measurements taken for different constituents found in the three types of\n", + "wine.\n", + "\n", + "Original Owners: \n", + "\n", + "Forina, M. et al, PARVUS - \n", + "An Extendible Package for Data Exploration, Classification and Correlation. \n", + "Institute of Pharmaceutical and Food Analysis and Technologies,\n", + "Via Brigata Salerno, 16147 Genoa, Italy.\n", + "\n", + "Citation:\n", + "\n", + "Lichman, M. (2013). UCI Machine Learning Repository\n", + "[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,\n", + "School of Information and Computer Science. \n", + "\n", + ".. topic:: References\n", + "\n", + " (1) S. Aeberhard, D. Coomans and O. de Vel, \n", + " Comparison of Classifiers in High Dimensional Settings, \n", + " Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Technometrics). \n", + "\n", + " The data was used with many others for comparing various \n", + " classifiers. The classes are separable, though only RDA \n", + " has achieved 100% correct classification. \n", + " (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) \n", + " (All results using the leave-one-out technique) \n", + "\n", + " (2) S. Aeberhard, D. Coomans and O. de Vel, \n", + " \"THE CLASSIFICATION PERFORMANCE OF RDA\" \n", + " Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Journal of Chemometrics).\n", + "\n" + ] + } + ], + "source": [ + "print(data[\"DESCR\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1T2sgtWDvpvx", + "outputId": "62213bec-7b91-4863-afe3-7a5c856dc3c5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(3,)" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Сколько классов содержит целевая переменная датасета? \n", + "np.unique(data[\"target\"]).shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dG980xw9vpvy", + "outputId": "dfe35300-d2bc-4e49-ac5e-cbdc7f5f9139" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['class_0', 'class_1', 'class_2'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n", + "" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " B LSTAT \n", + "0 3.92 1065.0 \n", + "1 3.40 1050.0 \n", + "2 3.17 1185.0 \n", + "3 3.45 1480.0 \n", + "4 2.93 735.0 " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#На основе данных датасета (они содержатся в двумерном массиве Numpy) \n", + "#и названий признаков создайте датафрейм под названием X.\n", + "X = pd.DataFrame(data.data, columns=feature_names)\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nxfO8Ye4vpv2", + "outputId": "5c339ce1-ee73-406a-995a-5533b35664b2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 13 columns):\n", + "CRIM 178 non-null float64\n", + "ZN 178 non-null float64\n", + "INDUS 178 non-null float64\n", + "CHAS 178 non-null float64\n", + "NOX 178 non-null float64\n", + "RM 178 non-null float64\n", + "AGE 178 non-null float64\n", + "DIS 178 non-null float64\n", + "RAD 178 non-null float64\n", + "TAX 178 non-null float64\n", + "PTRATIO 178 non-null float64\n", + "B 178 non-null float64\n", + "LSTAT 178 non-null float64\n", + "dtypes: float64(13)\n", + "memory usage: 18.2 KB\n" + ] + } + ], + "source": [ + "#Выясните размер датафрейма X и установите, имеются ли в нем пропущенные значения.\n", + "X.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jQpYBv84vpv3", + "outputId": "11777d55-5920-4c5d-b941-428ca2421b23" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(178, 13)" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_CBcXjLQvpv5", + "outputId": "d265d9ab-340e-437e-9c8e-b40fa6996201" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "CRIM 0\n", + "ZN 0\n", + "INDUS 0\n", + "CHAS 0\n", + "NOX 0\n", + "RM 0\n", + "AGE 0\n", + "DIS 0\n", + "RAD 0\n", + "TAX 0\n", + "PTRATIO 0\n", + "B 0\n", + "LSTAT 0\n", + "dtype: int64" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.isnull().astype(\"int\").sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7BfndPdnvpv6", + "outputId": "c8975ee0-dfb2-470f-974d-cd18a80b6926" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 14 columns):\n", + "CRIM 178 non-null float64\n", + "ZN 178 non-null float64\n", + "INDUS 178 non-null float64\n", + "CHAS 178 non-null float64\n", + "NOX 178 non-null float64\n", + "RM 178 non-null float64\n", + "AGE 178 non-null float64\n", + "DIS 178 non-null float64\n", + "RAD 178 non-null float64\n", + "TAX 178 non-null float64\n", + "PTRATIO 178 non-null float64\n", + "B 178 non-null float64\n", + "LSTAT 178 non-null float64\n", + "target 178 non-null int64\n", + "dtypes: float64(13), int64(1)\n", + "memory usage: 19.5 KB\n" + ] + } + ], + "source": [ + "#Добавьте в датафрейм поле с классами вин в виде чисел, имеющих тип данных numpy.int64. Название поля - 'target'.\n", + "X[\"target\"]=data[\"target\"].astype(np.int64)\n", + "X.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zvaWeMYrvpv8", + "outputId": "e32d2f61-8c6d-45b4-f0bc-76da468d1e65" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " B LSTAT target \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 " + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ik_Jy89Qvpv9", + "outputId": "c89a2f16-a147-40b6-b5f7-c4e3b0ceeab7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
CRIM1.0000000.0943970.211545-0.3102350.2707980.2891010.236815-0.1559290.1366980.546364-0.0717470.0723430.643720-0.328222
ZN0.0943971.0000000.1640450.288500-0.054575-0.335167-0.4110070.292977-0.2207460.248985-0.561296-0.368710-0.1920110.437776
INDUS0.2115450.1640451.0000000.4433670.2865870.1289800.1150770.1862300.0096520.258887-0.0746670.0039110.223626-0.049643
CHAS-0.3102350.2885000.4433671.000000-0.083333-0.321113-0.3513700.361922-0.1973270.018732-0.273955-0.276769-0.4405970.517859
NOX0.270798-0.0545750.286587-0.0833331.0000000.2144010.195784-0.2562940.2364410.1999500.0553980.0660040.393351-0.209179
RM0.289101-0.3351670.128980-0.3211130.2144011.0000000.864564-0.4499350.612413-0.0551360.4336810.6999490.498115-0.719163
AGE0.236815-0.4110070.115077-0.3513700.1957840.8645641.000000-0.5379000.652692-0.1723790.5434790.7871940.494193-0.847498
DIS-0.1559290.2929770.1862300.361922-0.256294-0.449935-0.5379001.000000-0.3658450.139057-0.262640-0.503270-0.3113850.489109
RAD0.136698-0.2207460.009652-0.1973270.2364410.6124130.652692-0.3658451.000000-0.0252500.2955440.5190670.330417-0.499130
TAX0.5463640.2489850.2588870.0187320.199950-0.055136-0.1723790.139057-0.0252501.000000-0.521813-0.4288150.3161000.265668
PTRATIO-0.071747-0.561296-0.074667-0.2739550.0553980.4336810.543479-0.2626400.295544-0.5218131.0000000.5654680.236183-0.617369
B0.072343-0.3687100.003911-0.2767690.0660040.6999490.787194-0.5032700.519067-0.4288150.5654681.0000000.312761-0.788230
LSTAT0.643720-0.1920110.223626-0.4405970.3933510.4981150.494193-0.3113850.3304170.3161000.2361830.3127611.000000-0.633717
target-0.3282220.437776-0.0496430.517859-0.209179-0.719163-0.8474980.489109-0.4991300.265668-0.617369-0.788230-0.6337171.000000
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE \\\n", + "CRIM 1.000000 0.094397 0.211545 -0.310235 0.270798 0.289101 0.236815 \n", + "ZN 0.094397 1.000000 0.164045 0.288500 -0.054575 -0.335167 -0.411007 \n", + "INDUS 0.211545 0.164045 1.000000 0.443367 0.286587 0.128980 0.115077 \n", + "CHAS -0.310235 0.288500 0.443367 1.000000 -0.083333 -0.321113 -0.351370 \n", + "NOX 0.270798 -0.054575 0.286587 -0.083333 1.000000 0.214401 0.195784 \n", + "RM 0.289101 -0.335167 0.128980 -0.321113 0.214401 1.000000 0.864564 \n", + "AGE 0.236815 -0.411007 0.115077 -0.351370 0.195784 0.864564 1.000000 \n", + "DIS -0.155929 0.292977 0.186230 0.361922 -0.256294 -0.449935 -0.537900 \n", + "RAD 0.136698 -0.220746 0.009652 -0.197327 0.236441 0.612413 0.652692 \n", + "TAX 0.546364 0.248985 0.258887 0.018732 0.199950 -0.055136 -0.172379 \n", + "PTRATIO -0.071747 -0.561296 -0.074667 -0.273955 0.055398 0.433681 0.543479 \n", + "B 0.072343 -0.368710 0.003911 -0.276769 0.066004 0.699949 0.787194 \n", + "LSTAT 0.643720 -0.192011 0.223626 -0.440597 0.393351 0.498115 0.494193 \n", + "target -0.328222 0.437776 -0.049643 0.517859 -0.209179 -0.719163 -0.847498 \n", + "\n", + " DIS RAD TAX PTRATIO B LSTAT target \n", + "CRIM -0.155929 0.136698 0.546364 -0.071747 0.072343 0.643720 -0.328222 \n", + "ZN 0.292977 -0.220746 0.248985 -0.561296 -0.368710 -0.192011 0.437776 \n", + "INDUS 0.186230 0.009652 0.258887 -0.074667 0.003911 0.223626 -0.049643 \n", + "CHAS 0.361922 -0.197327 0.018732 -0.273955 -0.276769 -0.440597 0.517859 \n", + "NOX -0.256294 0.236441 0.199950 0.055398 0.066004 0.393351 -0.209179 \n", + "RM -0.449935 0.612413 -0.055136 0.433681 0.699949 0.498115 -0.719163 \n", + "AGE -0.537900 0.652692 -0.172379 0.543479 0.787194 0.494193 -0.847498 \n", + "DIS 1.000000 -0.365845 0.139057 -0.262640 -0.503270 -0.311385 0.489109 \n", + "RAD -0.365845 1.000000 -0.025250 0.295544 0.519067 0.330417 -0.499130 \n", + "TAX 0.139057 -0.025250 1.000000 -0.521813 -0.428815 0.316100 0.265668 \n", + "PTRATIO -0.262640 0.295544 -0.521813 1.000000 0.565468 0.236183 -0.617369 \n", + "B -0.503270 0.519067 -0.428815 0.565468 1.000000 0.312761 -0.788230 \n", + "LSTAT -0.311385 0.330417 0.316100 0.236183 0.312761 1.000000 -0.633717 \n", + "target 0.489109 -0.499130 0.265668 -0.617369 -0.788230 -0.633717 1.000000 " + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Постройте матрицу корреляций для всех полей X. Дайте полученному датафрейму название X_corr.\n", + "X_corr=X.corr()\n", + "X_corr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ufH2XtFcvpv-", + "outputId": "c2d64419-a780-484b-9085-f2bc0865e8f4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['CHAS', 'RM', 'AGE', 'PTRATIO', 'B', 'LSTAT']" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Создайте список high_corr из признаков, корреляция которых с полем target по абсолютному \n", + "#значению превышает 0.5 (причем, само поле target не должно входить в этот список).\n", + "high_corr=X_corr[\"target\"]\n", + "high_corr=high_corr[np.abs(high_corr)>0.5].drop(\"target\", axis=0)\n", + "high_corr=list(high_corr.index)\n", + "high_corr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VC3e1oB3vpwA", + "outputId": "fc199182-3bc2-4172-f1a3-041e00999647" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTAT
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " B LSTAT \n", + "0 3.92 1065.0 \n", + "1 3.40 1050.0 \n", + "2 3.17 1185.0 \n", + "3 3.45 1480.0 \n", + "4 2.93 735.0 " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Удалите из датафрейма X поле с целевой переменной. \n", + "X=X.drop(\"target\", axis=1)\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AvKFv4FzvpwB", + "outputId": "c004b4e6-d4f1-46f3-f358-54f9a166e6fd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATCHAS_2RM_2AGE_2PTRATIO_2B_2LSTAT_2
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0243.367.84009.36361.081615.36641134225.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0125.447.02257.61761.102511.56001102500.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0345.967.840010.49761.060910.04891404225.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0282.2414.822512.18010.739611.90252190400.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0441.007.84007.23611.08168.5849540225.0
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29 5.64 1.04 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28 4.38 1.05 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81 5.68 1.03 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18 7.80 0.86 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " B LSTAT CHAS_2 RM_2 AGE_2 PTRATIO_2 B_2 LSTAT_2 \n", + "0 3.92 1065.0 243.36 7.8400 9.3636 1.0816 15.3664 1134225.0 \n", + "1 3.40 1050.0 125.44 7.0225 7.6176 1.1025 11.5600 1102500.0 \n", + "2 3.17 1185.0 345.96 7.8400 10.4976 1.0609 10.0489 1404225.0 \n", + "3 3.45 1480.0 282.24 14.8225 12.1801 0.7396 11.9025 2190400.0 \n", + "4 2.93 735.0 441.00 7.8400 7.2361 1.0816 8.5849 540225.0 " + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Для всех признаков, названия которых содержатся в списке high_corr, вычислите квадрат их \n", + "#значений и добавьте в датафрейм X соответствующие поля с суффиксом '_2', добавленного к \n", + "#первоначальному названию признака. Итоговый датафрейм должен содержать все поля, которые, \n", + "#были в нем изначально, а также поля с признаками из списка high_corr, возведенными в квадрат. \n", + "for i in high_corr:\n", + " X[i+\"_2\"]=X[i]**2\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4DVqJzT8vpwC", + "outputId": "991a6101-6477-46fa-e5bf-be5ea4706229" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATCHAS_2RM_2AGE_2PTRATIO_2B_2LSTAT_2
count178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.0000001.780000e+02
mean13.0006182.3363482.36651719.49494499.7415732.2951122.0292700.3618541.5908995.0580900.9574492.611685746.893258391.1428655.6570305.1100490.9686617.3221556.564591e+05
std0.8118271.1171460.2743443.33956414.2824840.6258510.9988590.1244530.5723592.3182860.2285720.709990314.907474133.6717752.9362944.2114410.4437983.5843165.558591e+05
min11.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000112.3600000.9604000.1156000.2304001.6129007.728400e+04
25%12.3625001.6025002.21000017.20000088.0000001.7425001.2050000.2700001.2500003.2200000.7825001.937500500.500000295.8400003.0363251.4521000.6123253.7540752.505010e+05
50%13.0500001.8650002.36000019.50000098.0000002.3550002.1350000.3400001.5550004.6900000.9650002.780000673.500000380.2500005.5460504.5582500.9312507.7284004.536045e+05
75%13.6775003.0825002.55750021.500000107.0000002.8000002.8750000.4375001.9500006.2000001.1200003.170000985.000000462.2500007.8400008.2657001.25440010.0489009.702250e+05
max14.8300005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000900.00000015.05440025.8064002.92410016.0000002.822400e+06
\n", + "
" + ], + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM \\\n", + "count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 \n", + "mean 13.000618 2.336348 2.366517 19.494944 99.741573 2.295112 \n", + "std 0.811827 1.117146 0.274344 3.339564 14.282484 0.625851 \n", + "min 11.030000 0.740000 1.360000 10.600000 70.000000 0.980000 \n", + "25% 12.362500 1.602500 2.210000 17.200000 88.000000 1.742500 \n", + "50% 13.050000 1.865000 2.360000 19.500000 98.000000 2.355000 \n", + "75% 13.677500 3.082500 2.557500 21.500000 107.000000 2.800000 \n", + "max 14.830000 5.800000 3.230000 30.000000 162.000000 3.880000 \n", + "\n", + " AGE DIS RAD TAX PTRATIO B \\\n", + "count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 \n", + "mean 2.029270 0.361854 1.590899 5.058090 0.957449 2.611685 \n", + "std 0.998859 0.124453 0.572359 2.318286 0.228572 0.709990 \n", + "min 0.340000 0.130000 0.410000 1.280000 0.480000 1.270000 \n", + "25% 1.205000 0.270000 1.250000 3.220000 0.782500 1.937500 \n", + "50% 2.135000 0.340000 1.555000 4.690000 0.965000 2.780000 \n", + "75% 2.875000 0.437500 1.950000 6.200000 1.120000 3.170000 \n", + "max 5.080000 0.660000 3.580000 13.000000 1.710000 4.000000 \n", + "\n", + " LSTAT CHAS_2 RM_2 AGE_2 PTRATIO_2 \\\n", + "count 178.000000 178.000000 178.000000 178.000000 178.000000 \n", + "mean 746.893258 391.142865 5.657030 5.110049 0.968661 \n", + "std 314.907474 133.671775 2.936294 4.211441 0.443798 \n", + "min 278.000000 112.360000 0.960400 0.115600 0.230400 \n", + "25% 500.500000 295.840000 3.036325 1.452100 0.612325 \n", + "50% 673.500000 380.250000 5.546050 4.558250 0.931250 \n", + "75% 985.000000 462.250000 7.840000 8.265700 1.254400 \n", + "max 1680.000000 900.000000 15.054400 25.806400 2.924100 \n", + "\n", + " B_2 LSTAT_2 \n", + "count 178.000000 1.780000e+02 \n", + "mean 7.322155 6.564591e+05 \n", + "std 3.584316 5.558591e+05 \n", + "min 1.612900 7.728400e+04 \n", + "25% 3.754075 2.505010e+05 \n", + "50% 7.728400 4.536045e+05 \n", + "75% 10.048900 9.702250e+05 \n", + "max 16.000000 2.822400e+06 " + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Выведите описание полей датафрейма X с помощью метода describe.\n", + "X.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XVom2-ZWvpwD" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "dz_analitic3.ipynb\"", + "provenance": [ + { + "file_id": "1zczSw0aBbnW4AOv5ODmOWuQ72F8HZjxr", + "timestamp": 1645102884965 + } + ] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}