Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- Amazon.pkl +3 -0
- Derin Öğrenme Regresyon ile Amazon İçin Önerilen Sistemi - Recommended System for Amazon with Deep Learning Regression.ipynb +1414 -0
- app.py +49 -0
- requirements.txt +4 -0
- test.csv +0 -0
- train.csv +0 -0
Amazon.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b22720d0a2e6578e9c00aea0f4f135afa058dbe00923dacecec97a5867153a1
|
3 |
+
size 51023412
|
Derin Öğrenme Regresyon ile Amazon İçin Önerilen Sistemi - Recommended System for Amazon with Deep Learning Regression.ipynb
ADDED
@@ -0,0 +1,1414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "3559561e",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# <font color=#025dfa> Derin Öğrenme Regresyon ile Amazon İçin Önerilen Sistemi - Recommended System for Amazon with Deep Learning Regression"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "2b79c37a",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"TR = Her yorum satırı kendisini üstündeki koda aittir. İlk olarak Türkçe, son olarak İngilizce yazıldı.\n",
|
17 |
+
"\n",
|
18 |
+
"EN = Each comment line belongs to the code above it. It was first written in Turkish and lastly in English.\n",
|
19 |
+
"\n",
|
20 |
+
"TR = Bu proje, Amazon platformu için kullanıcıların satın alma davranışlarını analiz ederek kişiselleştirilmiş öneriler sunan bir regresyon modeli geliştirmeyi hedeflemektedir. Kullanıcıların geçmiş alışveriş verileri, ürün özellikleri ve kullanıcı profilleri gibi çeşitli veriler kullanılarak, kullanıcıların ilgisini çekebilecek ürünlerin tahmin edilmesi sağlanacaktır. Derin öğrenme teknikleri ile desteklenen bu öneri sistemi, kullanıcı deneyimini iyileştirmek ve satışları artırmak amacıyla daha etkili ve doğru öneriler sunmayı amaçlamaktadır. Proje, e-ticaret sektöründe rekabet avantajı elde etmek için veri odaklı karar verme süreçlerini güçlendirmeyi hedeflemektedir.\n",
|
21 |
+
"\n",
|
22 |
+
"EN = This project aims to develop a regression model that analyzes users' purchasing behaviors for the Amazon platform and provides personalized recommendations. Various data such as users' past shopping data, product features, and user profiles will be used to predict products that may interest users. Supported by deep learning techniques, this recommendation system aims to provide more effective and accurate recommendations to improve user experience and increase sales. The project aims to strengthen data-driven decision-making processes to gain competitive advantage in the e-commerce sector.\n",
|
23 |
+
"\n",
|
24 |
+
"Kaynak/Source = https://www.kaggle.com/competitions/recommended-system-for-amazon-icl-2021"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 1,
|
30 |
+
"id": "15fce1d5",
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"import pandas as pd\n",
|
35 |
+
"import numpy as np\n",
|
36 |
+
"import seaborn as sns\n",
|
37 |
+
"import tensorflow as tf\n",
|
38 |
+
"import matplotlib.pyplot as plt\n",
|
39 |
+
"import json\n",
|
40 |
+
"import re\n",
|
41 |
+
"import pickle\n",
|
42 |
+
"\n",
|
43 |
+
"import warnings\n",
|
44 |
+
"warnings.filterwarnings('ignore')\n",
|
45 |
+
"\n",
|
46 |
+
"from scipy import spatial #harital üzerindeki mesafeyi ölçüyor\n",
|
47 |
+
"\n",
|
48 |
+
"from tensorflow.keras import layers, models\n",
|
49 |
+
"from tensorflow.keras.models import Sequential\n",
|
50 |
+
"from tensorflow.keras.layers import Dense, Dropout, BatchNormalization,Flatten, LeakyReLU\n",
|
51 |
+
"from sklearn.model_selection import train_test_split\n",
|
52 |
+
"from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n",
|
53 |
+
"from tensorflow.keras.callbacks import EarlyStopping\n",
|
54 |
+
"from sklearn.preprocessing import OneHotEncoder,StandardScaler,MinMaxScaler\n",
|
55 |
+
"from scipy import spatial"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cell_type": "code",
|
60 |
+
"execution_count": 2,
|
61 |
+
"id": "3179c9ae",
|
62 |
+
"metadata": {},
|
63 |
+
"outputs": [],
|
64 |
+
"source": [
|
65 |
+
"df=pd.read_csv('train.csv')"
|
66 |
+
]
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"cell_type": "code",
|
70 |
+
"execution_count": 3,
|
71 |
+
"id": "4d3ac721",
|
72 |
+
"metadata": {},
|
73 |
+
"outputs": [
|
74 |
+
{
|
75 |
+
"name": "stdout",
|
76 |
+
"output_type": "stream",
|
77 |
+
"text": [
|
78 |
+
" userId productId Rating\n",
|
79 |
+
"0 AOPE42H34R0EC B00000DM9W 5.0\n",
|
80 |
+
"1 A1GI09JC6L0NF7 B00004SABJ 4.0\n",
|
81 |
+
"2 AZLZII4AFX56R B00000J579 3.0\n",
|
82 |
+
"3 A34AHNT6GD9FWW 9888002198 5.0\n",
|
83 |
+
"4 A2PXRAO5C1XTLW 0972683275 5.0\n"
|
84 |
+
]
|
85 |
+
}
|
86 |
+
],
|
87 |
+
"source": [
|
88 |
+
"print(df.head())"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "code",
|
93 |
+
"execution_count": 4,
|
94 |
+
"id": "52889019",
|
95 |
+
"metadata": {},
|
96 |
+
"outputs": [
|
97 |
+
{
|
98 |
+
"name": "stdout",
|
99 |
+
"output_type": "stream",
|
100 |
+
"text": [
|
101 |
+
" userId productId Rating\n",
|
102 |
+
"17343 A2KFK3WT8VAJC5 B00004SPUN 5.0\n",
|
103 |
+
"20627 A1DQGI584UAKRI B00001WRSJ 5.0\n",
|
104 |
+
"2410 A3R3A8D3D9JVWU B00000J3H5 5.0\n",
|
105 |
+
"15640 A228FCKXMDRW59 B00000J434 5.0\n",
|
106 |
+
"7515 A1R7POV8N6O5MZ B00004R8V6 5.0\n"
|
107 |
+
]
|
108 |
+
}
|
109 |
+
],
|
110 |
+
"source": [
|
111 |
+
"print(df.sample(5))"
|
112 |
+
]
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"cell_type": "code",
|
116 |
+
"execution_count": 5,
|
117 |
+
"id": "6129efe3",
|
118 |
+
"metadata": {},
|
119 |
+
"outputs": [
|
120 |
+
{
|
121 |
+
"name": "stdout",
|
122 |
+
"output_type": "stream",
|
123 |
+
"text": [
|
124 |
+
" userId productId Rating\n",
|
125 |
+
"33970 A22GYGQ14GHSCD B00004TDLD 5.0\n",
|
126 |
+
"33971 A2O30HQWWYD5FH B000038ABH 5.0\n",
|
127 |
+
"33972 A37OTRJO1NM63H B00003ETSJ 1.0\n",
|
128 |
+
"33973 A2SLR2VUDUGCQM B00004TDLD 5.0\n",
|
129 |
+
"33974 A339ZN69W7N8PW B00001P4XH 4.0\n"
|
130 |
+
]
|
131 |
+
}
|
132 |
+
],
|
133 |
+
"source": [
|
134 |
+
"print(df.tail())"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": 6,
|
140 |
+
"id": "f2f862d3",
|
141 |
+
"metadata": {},
|
142 |
+
"outputs": [
|
143 |
+
{
|
144 |
+
"name": "stdout",
|
145 |
+
"output_type": "stream",
|
146 |
+
"text": [
|
147 |
+
"(33975, 3)\n"
|
148 |
+
]
|
149 |
+
}
|
150 |
+
],
|
151 |
+
"source": [
|
152 |
+
"print(df.shape)"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"cell_type": "code",
|
157 |
+
"execution_count": 7,
|
158 |
+
"id": "905b75f3",
|
159 |
+
"metadata": {},
|
160 |
+
"outputs": [
|
161 |
+
{
|
162 |
+
"name": "stdout",
|
163 |
+
"output_type": "stream",
|
164 |
+
"text": [
|
165 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
166 |
+
"RangeIndex: 33975 entries, 0 to 33974\n",
|
167 |
+
"Data columns (total 3 columns):\n",
|
168 |
+
" # Column Non-Null Count Dtype \n",
|
169 |
+
"--- ------ -------------- ----- \n",
|
170 |
+
" 0 userId 33975 non-null object \n",
|
171 |
+
" 1 productId 33975 non-null object \n",
|
172 |
+
" 2 Rating 33975 non-null float64\n",
|
173 |
+
"dtypes: float64(1), object(2)\n",
|
174 |
+
"memory usage: 796.4+ KB\n",
|
175 |
+
"None\n"
|
176 |
+
]
|
177 |
+
}
|
178 |
+
],
|
179 |
+
"source": [
|
180 |
+
"print(df.info())"
|
181 |
+
]
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"cell_type": "code",
|
185 |
+
"execution_count": 8,
|
186 |
+
"id": "4f10873d",
|
187 |
+
"metadata": {},
|
188 |
+
"outputs": [
|
189 |
+
{
|
190 |
+
"name": "stdout",
|
191 |
+
"output_type": "stream",
|
192 |
+
"text": [
|
193 |
+
"userId 0\n",
|
194 |
+
"productId 0\n",
|
195 |
+
"Rating 0\n",
|
196 |
+
"dtype: int64\n"
|
197 |
+
]
|
198 |
+
}
|
199 |
+
],
|
200 |
+
"source": [
|
201 |
+
"print(df.isnull().sum().sort_values(ascending=True))"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"cell_type": "code",
|
206 |
+
"execution_count": 9,
|
207 |
+
"id": "5b4633e8",
|
208 |
+
"metadata": {},
|
209 |
+
"outputs": [
|
210 |
+
{
|
211 |
+
"data": {
|
212 |
+
"text/plain": [
|
213 |
+
"userId A38YWVKHDGWXFF\n",
|
214 |
+
"productId B00004T8R2\n",
|
215 |
+
"Rating 5.0\n",
|
216 |
+
"Name: 195, dtype: object"
|
217 |
+
]
|
218 |
+
},
|
219 |
+
"execution_count": 9,
|
220 |
+
"metadata": {},
|
221 |
+
"output_type": "execute_result"
|
222 |
+
}
|
223 |
+
],
|
224 |
+
"source": [
|
225 |
+
"df.iloc[195]"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"cell_type": "code",
|
230 |
+
"execution_count": 12,
|
231 |
+
"id": "0808c607",
|
232 |
+
"metadata": {},
|
233 |
+
"outputs": [
|
234 |
+
{
|
235 |
+
"data": {
|
236 |
+
"text/plain": [
|
237 |
+
"userId A3B2X7BT9UCAR3\n",
|
238 |
+
"productId B00000J40W\n",
|
239 |
+
"Rating 4.0\n",
|
240 |
+
"Name: 241, dtype: object"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
"execution_count": 12,
|
244 |
+
"metadata": {},
|
245 |
+
"output_type": "execute_result"
|
246 |
+
}
|
247 |
+
],
|
248 |
+
"source": [
|
249 |
+
"df.iloc[241]"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"cell_type": "code",
|
254 |
+
"execution_count": 13,
|
255 |
+
"id": "d0d7091a",
|
256 |
+
"metadata": {},
|
257 |
+
"outputs": [],
|
258 |
+
"source": [
|
259 |
+
"df['Rating']=df['Rating'].astype(int)"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"cell_type": "code",
|
264 |
+
"execution_count": 14,
|
265 |
+
"id": "a9e85528",
|
266 |
+
"metadata": {},
|
267 |
+
"outputs": [
|
268 |
+
{
|
269 |
+
"data": {
|
270 |
+
"text/plain": [
|
271 |
+
"Rating\n",
|
272 |
+
"5 19004\n",
|
273 |
+
"4 6958\n",
|
274 |
+
"1 3492\n",
|
275 |
+
"3 2659\n",
|
276 |
+
"2 1862\n",
|
277 |
+
"Name: count, dtype: int64"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
"execution_count": 14,
|
281 |
+
"metadata": {},
|
282 |
+
"output_type": "execute_result"
|
283 |
+
}
|
284 |
+
],
|
285 |
+
"source": [
|
286 |
+
"df.Rating.value_counts()"
|
287 |
+
]
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"cell_type": "markdown",
|
291 |
+
"id": "46964eae",
|
292 |
+
"metadata": {},
|
293 |
+
"source": [
|
294 |
+
"## <font color=#FFD700> Popülerliğe Dayalı - Popularity Based"
|
295 |
+
]
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"cell_type": "code",
|
299 |
+
"execution_count": 15,
|
300 |
+
"id": "df9b7a83",
|
301 |
+
"metadata": {},
|
302 |
+
"outputs": [],
|
303 |
+
"source": [
|
304 |
+
"product_grouped = df.groupby('productId').agg({'Rating': [np.size, np.sum, np.mean]})\n",
|
305 |
+
"# TR = ratings veri setini 'title' sütununa göre gruplayıp, her film için 'rating' sütununda 3 farklı istatistiği hesaplıyoruz:\n",
|
306 |
+
"# np.size ile kaç değerlendirme yapıldığını (sayısı),\n",
|
307 |
+
"# np.sum ile toplam değerlendirme puanını,\n",
|
308 |
+
"# np.mean ile ortalama değerlendirme puanını hesaplıyoruz.\n",
|
309 |
+
"\n",
|
310 |
+
"# EN = We are grouping the ratings dataset by the 'title' column, and calculating 3 different statistics for the 'rating' column:\n",
|
311 |
+
"# np.size to get the number of ratings (count),\n",
|
312 |
+
"# np.sum to get the total rating score,\n",
|
313 |
+
"# np.mean to get the average rating score.\n"
|
314 |
+
]
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"cell_type": "code",
|
318 |
+
"execution_count": 16,
|
319 |
+
"id": "68c25003",
|
320 |
+
"metadata": {},
|
321 |
+
"outputs": [
|
322 |
+
{
|
323 |
+
"data": {
|
324 |
+
"text/html": [
|
325 |
+
"<div>\n",
|
326 |
+
"<style scoped>\n",
|
327 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
328 |
+
" vertical-align: middle;\n",
|
329 |
+
" }\n",
|
330 |
+
"\n",
|
331 |
+
" .dataframe tbody tr th {\n",
|
332 |
+
" vertical-align: top;\n",
|
333 |
+
" }\n",
|
334 |
+
"\n",
|
335 |
+
" .dataframe thead tr th {\n",
|
336 |
+
" text-align: left;\n",
|
337 |
+
" }\n",
|
338 |
+
"\n",
|
339 |
+
" .dataframe thead tr:last-of-type th {\n",
|
340 |
+
" text-align: right;\n",
|
341 |
+
" }\n",
|
342 |
+
"</style>\n",
|
343 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
344 |
+
" <thead>\n",
|
345 |
+
" <tr>\n",
|
346 |
+
" <th></th>\n",
|
347 |
+
" <th colspan=\"3\" halign=\"left\">Rating</th>\n",
|
348 |
+
" </tr>\n",
|
349 |
+
" <tr>\n",
|
350 |
+
" <th></th>\n",
|
351 |
+
" <th>size</th>\n",
|
352 |
+
" <th>sum</th>\n",
|
353 |
+
" <th>mean</th>\n",
|
354 |
+
" </tr>\n",
|
355 |
+
" <tr>\n",
|
356 |
+
" <th>productId</th>\n",
|
357 |
+
" <th></th>\n",
|
358 |
+
" <th></th>\n",
|
359 |
+
" <th></th>\n",
|
360 |
+
" </tr>\n",
|
361 |
+
" </thead>\n",
|
362 |
+
" <tbody>\n",
|
363 |
+
" <tr>\n",
|
364 |
+
" <th>0528881469</th>\n",
|
365 |
+
" <td>18</td>\n",
|
366 |
+
" <td>48</td>\n",
|
367 |
+
" <td>2.666667</td>\n",
|
368 |
+
" </tr>\n",
|
369 |
+
" <tr>\n",
|
370 |
+
" <th>0594033926</th>\n",
|
371 |
+
" <td>10</td>\n",
|
372 |
+
" <td>44</td>\n",
|
373 |
+
" <td>4.400000</td>\n",
|
374 |
+
" </tr>\n",
|
375 |
+
" <tr>\n",
|
376 |
+
" <th>0594451647</th>\n",
|
377 |
+
" <td>12</td>\n",
|
378 |
+
" <td>51</td>\n",
|
379 |
+
" <td>4.250000</td>\n",
|
380 |
+
" </tr>\n",
|
381 |
+
" <tr>\n",
|
382 |
+
" <th>0594481813</th>\n",
|
383 |
+
" <td>28</td>\n",
|
384 |
+
" <td>118</td>\n",
|
385 |
+
" <td>4.214286</td>\n",
|
386 |
+
" </tr>\n",
|
387 |
+
" <tr>\n",
|
388 |
+
" <th>0594481902</th>\n",
|
389 |
+
" <td>12</td>\n",
|
390 |
+
" <td>52</td>\n",
|
391 |
+
" <td>4.333333</td>\n",
|
392 |
+
" </tr>\n",
|
393 |
+
" <tr>\n",
|
394 |
+
" <th>...</th>\n",
|
395 |
+
" <td>...</td>\n",
|
396 |
+
" <td>...</td>\n",
|
397 |
+
" <td>...</td>\n",
|
398 |
+
" </tr>\n",
|
399 |
+
" <tr>\n",
|
400 |
+
" <th>B00004THPR</th>\n",
|
401 |
+
" <td>10</td>\n",
|
402 |
+
" <td>43</td>\n",
|
403 |
+
" <td>4.300000</td>\n",
|
404 |
+
" </tr>\n",
|
405 |
+
" <tr>\n",
|
406 |
+
" <th>B00004THQ0</th>\n",
|
407 |
+
" <td>19</td>\n",
|
408 |
+
" <td>67</td>\n",
|
409 |
+
" <td>3.526316</td>\n",
|
410 |
+
" </tr>\n",
|
411 |
+
" <tr>\n",
|
412 |
+
" <th>B00004THQ5</th>\n",
|
413 |
+
" <td>18</td>\n",
|
414 |
+
" <td>67</td>\n",
|
415 |
+
" <td>3.722222</td>\n",
|
416 |
+
" </tr>\n",
|
417 |
+
" <tr>\n",
|
418 |
+
" <th>B00004TIZS</th>\n",
|
419 |
+
" <td>13</td>\n",
|
420 |
+
" <td>48</td>\n",
|
421 |
+
" <td>3.692308</td>\n",
|
422 |
+
" </tr>\n",
|
423 |
+
" <tr>\n",
|
424 |
+
" <th>B00004TJ0L</th>\n",
|
425 |
+
" <td>18</td>\n",
|
426 |
+
" <td>81</td>\n",
|
427 |
+
" <td>4.500000</td>\n",
|
428 |
+
" </tr>\n",
|
429 |
+
" </tbody>\n",
|
430 |
+
"</table>\n",
|
431 |
+
"<p>737 rows × 3 columns</p>\n",
|
432 |
+
"</div>"
|
433 |
+
],
|
434 |
+
"text/plain": [
|
435 |
+
" Rating \n",
|
436 |
+
" size sum mean\n",
|
437 |
+
"productId \n",
|
438 |
+
"0528881469 18 48 2.666667\n",
|
439 |
+
"0594033926 10 44 4.400000\n",
|
440 |
+
"0594451647 12 51 4.250000\n",
|
441 |
+
"0594481813 28 118 4.214286\n",
|
442 |
+
"0594481902 12 52 4.333333\n",
|
443 |
+
"... ... ... ...\n",
|
444 |
+
"B00004THPR 10 43 4.300000\n",
|
445 |
+
"B00004THQ0 19 67 3.526316\n",
|
446 |
+
"B00004THQ5 18 67 3.722222\n",
|
447 |
+
"B00004TIZS 13 48 3.692308\n",
|
448 |
+
"B00004TJ0L 18 81 4.500000\n",
|
449 |
+
"\n",
|
450 |
+
"[737 rows x 3 columns]"
|
451 |
+
]
|
452 |
+
},
|
453 |
+
"execution_count": 16,
|
454 |
+
"metadata": {},
|
455 |
+
"output_type": "execute_result"
|
456 |
+
}
|
457 |
+
],
|
458 |
+
"source": [
|
459 |
+
"product_grouped"
|
460 |
+
]
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"cell_type": "code",
|
464 |
+
"execution_count": 17,
|
465 |
+
"id": "edc6b97f",
|
466 |
+
"metadata": {},
|
467 |
+
"outputs": [],
|
468 |
+
"source": [
|
469 |
+
"populer_product = product_grouped.sort_values(('Rating', 'mean'), ascending=False)\n",
|
470 |
+
"# TR = movie_grouped veri setini 'rating' sütununun 'mean' (ortalama) değerine göre azalan sırayla sıralıyoruz.\n",
|
471 |
+
"# Bu, en yüksek ortalama puana sahip filmleri en üste getirir.\n",
|
472 |
+
"\n",
|
473 |
+
"# EN = We are sorting the movie_grouped dataset by the 'mean' (average) value of the 'rating' column in descending order.\n",
|
474 |
+
"# This brings the movies with the highest average ratings to the top.\n"
|
475 |
+
]
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"cell_type": "code",
|
479 |
+
"execution_count": 18,
|
480 |
+
"id": "a55119e5",
|
481 |
+
"metadata": {},
|
482 |
+
"outputs": [
|
483 |
+
{
|
484 |
+
"data": {
|
485 |
+
"text/html": [
|
486 |
+
"<div>\n",
|
487 |
+
"<style scoped>\n",
|
488 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
489 |
+
" vertical-align: middle;\n",
|
490 |
+
" }\n",
|
491 |
+
"\n",
|
492 |
+
" .dataframe tbody tr th {\n",
|
493 |
+
" vertical-align: top;\n",
|
494 |
+
" }\n",
|
495 |
+
"\n",
|
496 |
+
" .dataframe thead tr th {\n",
|
497 |
+
" text-align: left;\n",
|
498 |
+
" }\n",
|
499 |
+
"\n",
|
500 |
+
" .dataframe thead tr:last-of-type th {\n",
|
501 |
+
" text-align: right;\n",
|
502 |
+
" }\n",
|
503 |
+
"</style>\n",
|
504 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
505 |
+
" <thead>\n",
|
506 |
+
" <tr>\n",
|
507 |
+
" <th></th>\n",
|
508 |
+
" <th colspan=\"3\" halign=\"left\">Rating</th>\n",
|
509 |
+
" </tr>\n",
|
510 |
+
" <tr>\n",
|
511 |
+
" <th></th>\n",
|
512 |
+
" <th>size</th>\n",
|
513 |
+
" <th>sum</th>\n",
|
514 |
+
" <th>mean</th>\n",
|
515 |
+
" </tr>\n",
|
516 |
+
" <tr>\n",
|
517 |
+
" <th>productId</th>\n",
|
518 |
+
" <th></th>\n",
|
519 |
+
" <th></th>\n",
|
520 |
+
" <th></th>\n",
|
521 |
+
" </tr>\n",
|
522 |
+
" </thead>\n",
|
523 |
+
" <tbody>\n",
|
524 |
+
" <tr>\n",
|
525 |
+
" <th>B00000J3NE</th>\n",
|
526 |
+
" <td>17</td>\n",
|
527 |
+
" <td>85</td>\n",
|
528 |
+
" <td>5.0</td>\n",
|
529 |
+
" </tr>\n",
|
530 |
+
" <tr>\n",
|
531 |
+
" <th>B00004S54K</th>\n",
|
532 |
+
" <td>8</td>\n",
|
533 |
+
" <td>40</td>\n",
|
534 |
+
" <td>5.0</td>\n",
|
535 |
+
" </tr>\n",
|
536 |
+
" <tr>\n",
|
537 |
+
" <th>B00002EQBU</th>\n",
|
538 |
+
" <td>7</td>\n",
|
539 |
+
" <td>35</td>\n",
|
540 |
+
" <td>5.0</td>\n",
|
541 |
+
" </tr>\n",
|
542 |
+
" <tr>\n",
|
543 |
+
" <th>B00002NAXD</th>\n",
|
544 |
+
" <td>12</td>\n",
|
545 |
+
" <td>60</td>\n",
|
546 |
+
" <td>5.0</td>\n",
|
547 |
+
" </tr>\n",
|
548 |
+
" <tr>\n",
|
549 |
+
" <th>B00000JDFI</th>\n",
|
550 |
+
" <td>11</td>\n",
|
551 |
+
" <td>55</td>\n",
|
552 |
+
" <td>5.0</td>\n",
|
553 |
+
" </tr>\n",
|
554 |
+
" </tbody>\n",
|
555 |
+
"</table>\n",
|
556 |
+
"</div>"
|
557 |
+
],
|
558 |
+
"text/plain": [
|
559 |
+
" Rating \n",
|
560 |
+
" size sum mean\n",
|
561 |
+
"productId \n",
|
562 |
+
"B00000J3NE 17 85 5.0\n",
|
563 |
+
"B00004S54K 8 40 5.0\n",
|
564 |
+
"B00002EQBU 7 35 5.0\n",
|
565 |
+
"B00002NAXD 12 60 5.0\n",
|
566 |
+
"B00000JDFI 11 55 5.0"
|
567 |
+
]
|
568 |
+
},
|
569 |
+
"execution_count": 18,
|
570 |
+
"metadata": {},
|
571 |
+
"output_type": "execute_result"
|
572 |
+
}
|
573 |
+
],
|
574 |
+
"source": [
|
575 |
+
"populer_product.head()"
|
576 |
+
]
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"cell_type": "code",
|
580 |
+
"execution_count": 19,
|
581 |
+
"id": "7ac7972b",
|
582 |
+
"metadata": {},
|
583 |
+
"outputs": [],
|
584 |
+
"source": [
|
585 |
+
"grouped_sum = product_grouped['Rating']['sum'].sum()\n",
|
586 |
+
"# TR = movie_grouped veri setindeki 'rating' sütununun 'sum' (toplam) değerlerini topluyoruz. \n",
|
587 |
+
"# Bu, tüm filmler için toplam değerlendirme puanlarını toplar.\n",
|
588 |
+
"\n",
|
589 |
+
"# EN = We are summing the 'sum' values from the 'rating' column in the movie_grouped dataset.\n",
|
590 |
+
"# This gives the total rating score across all movies."
|
591 |
+
]
|
592 |
+
},
|
593 |
+
{
|
594 |
+
"cell_type": "code",
|
595 |
+
"execution_count": 20,
|
596 |
+
"id": "e7667b4a",
|
597 |
+
"metadata": {},
|
598 |
+
"outputs": [],
|
599 |
+
"source": [
|
600 |
+
"populer_product['percentage'] = product_grouped['Rating']['sum'].div(grouped_sum) * 100\n",
|
601 |
+
"# TR = movie_grouped veri setindeki her bir filmin 'rating' sütunundaki 'sum' (toplam) değerini, grouped_sum (tüm filmlerin toplam değerlendirme puanı) ile bölüyoruz.\n",
|
602 |
+
"# Sonucu 100 ile çarparak, her filmin toplam değerlendirme puanının tüm filmler arasındaki yüzdesini hesaplıyoruz.\n",
|
603 |
+
"\n",
|
604 |
+
"# EN = We are dividing the 'sum' (total) value from the 'rating' column in the movie_grouped dataset by grouped_sum (the total rating score for all movies).\n",
|
605 |
+
"# We multiply the result by 100 to calculate the percentage of each movie's total rating score relative to the total across all movies.\n"
|
606 |
+
]
|
607 |
+
},
|
608 |
+
{
|
609 |
+
"cell_type": "code",
|
610 |
+
"execution_count": 21,
|
611 |
+
"id": "358b756e",
|
612 |
+
"metadata": {},
|
613 |
+
"outputs": [
|
614 |
+
{
|
615 |
+
"data": {
|
616 |
+
"text/html": [
|
617 |
+
"<div>\n",
|
618 |
+
"<style scoped>\n",
|
619 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
620 |
+
" vertical-align: middle;\n",
|
621 |
+
" }\n",
|
622 |
+
"\n",
|
623 |
+
" .dataframe tbody tr th {\n",
|
624 |
+
" vertical-align: top;\n",
|
625 |
+
" }\n",
|
626 |
+
"\n",
|
627 |
+
" .dataframe thead tr th {\n",
|
628 |
+
" text-align: left;\n",
|
629 |
+
" }\n",
|
630 |
+
"\n",
|
631 |
+
" .dataframe thead tr:last-of-type th {\n",
|
632 |
+
" text-align: right;\n",
|
633 |
+
" }\n",
|
634 |
+
"</style>\n",
|
635 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
636 |
+
" <thead>\n",
|
637 |
+
" <tr>\n",
|
638 |
+
" <th></th>\n",
|
639 |
+
" <th colspan=\"3\" halign=\"left\">Rating</th>\n",
|
640 |
+
" <th>percentage</th>\n",
|
641 |
+
" </tr>\n",
|
642 |
+
" <tr>\n",
|
643 |
+
" <th></th>\n",
|
644 |
+
" <th>size</th>\n",
|
645 |
+
" <th>sum</th>\n",
|
646 |
+
" <th>mean</th>\n",
|
647 |
+
" <th></th>\n",
|
648 |
+
" </tr>\n",
|
649 |
+
" <tr>\n",
|
650 |
+
" <th>productId</th>\n",
|
651 |
+
" <th></th>\n",
|
652 |
+
" <th></th>\n",
|
653 |
+
" <th></th>\n",
|
654 |
+
" <th></th>\n",
|
655 |
+
" </tr>\n",
|
656 |
+
" </thead>\n",
|
657 |
+
" <tbody>\n",
|
658 |
+
" <tr>\n",
|
659 |
+
" <th>B00000J3NE</th>\n",
|
660 |
+
" <td>17</td>\n",
|
661 |
+
" <td>85</td>\n",
|
662 |
+
" <td>5.0</td>\n",
|
663 |
+
" <td>0.061574</td>\n",
|
664 |
+
" </tr>\n",
|
665 |
+
" <tr>\n",
|
666 |
+
" <th>B00004S54K</th>\n",
|
667 |
+
" <td>8</td>\n",
|
668 |
+
" <td>40</td>\n",
|
669 |
+
" <td>5.0</td>\n",
|
670 |
+
" <td>0.028976</td>\n",
|
671 |
+
" </tr>\n",
|
672 |
+
" <tr>\n",
|
673 |
+
" <th>B00002EQBU</th>\n",
|
674 |
+
" <td>7</td>\n",
|
675 |
+
" <td>35</td>\n",
|
676 |
+
" <td>5.0</td>\n",
|
677 |
+
" <td>0.025354</td>\n",
|
678 |
+
" </tr>\n",
|
679 |
+
" <tr>\n",
|
680 |
+
" <th>B00002NAXD</th>\n",
|
681 |
+
" <td>12</td>\n",
|
682 |
+
" <td>60</td>\n",
|
683 |
+
" <td>5.0</td>\n",
|
684 |
+
" <td>0.043464</td>\n",
|
685 |
+
" </tr>\n",
|
686 |
+
" <tr>\n",
|
687 |
+
" <th>B00000JDFI</th>\n",
|
688 |
+
" <td>11</td>\n",
|
689 |
+
" <td>55</td>\n",
|
690 |
+
" <td>5.0</td>\n",
|
691 |
+
" <td>0.039842</td>\n",
|
692 |
+
" </tr>\n",
|
693 |
+
" </tbody>\n",
|
694 |
+
"</table>\n",
|
695 |
+
"</div>"
|
696 |
+
],
|
697 |
+
"text/plain": [
|
698 |
+
" Rating percentage\n",
|
699 |
+
" size sum mean \n",
|
700 |
+
"productId \n",
|
701 |
+
"B00000J3NE 17 85 5.0 0.061574\n",
|
702 |
+
"B00004S54K 8 40 5.0 0.028976\n",
|
703 |
+
"B00002EQBU 7 35 5.0 0.025354\n",
|
704 |
+
"B00002NAXD 12 60 5.0 0.043464\n",
|
705 |
+
"B00000JDFI 11 55 5.0 0.039842"
|
706 |
+
]
|
707 |
+
},
|
708 |
+
"execution_count": 21,
|
709 |
+
"metadata": {},
|
710 |
+
"output_type": "execute_result"
|
711 |
+
}
|
712 |
+
],
|
713 |
+
"source": [
|
714 |
+
"populer_product.head()"
|
715 |
+
]
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"cell_type": "code",
|
719 |
+
"execution_count": 22,
|
720 |
+
"id": "e011ec99",
|
721 |
+
"metadata": {},
|
722 |
+
"outputs": [],
|
723 |
+
"source": [
|
724 |
+
"populer_product = populer_product.sort_values('percentage', ascending=False)\n",
|
725 |
+
"# TR = populer_movies veri setini 'percentage' sütununa göre azalan sırayla sıralıyoruz.\n",
|
726 |
+
"# Bu, en yüksek yüzdelik değere sahip filmleri en üste getirir.\n",
|
727 |
+
"# EN = We are sorting the populer_movies dataset by the 'percentage' column in descending order.\n",
|
728 |
+
"# This brings the movies with the highest percentage values to the top."
|
729 |
+
]
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"cell_type": "code",
|
733 |
+
"execution_count": 23,
|
734 |
+
"id": "7011deaf",
|
735 |
+
"metadata": {},
|
736 |
+
"outputs": [
|
737 |
+
{
|
738 |
+
"data": {
|
739 |
+
"text/html": [
|
740 |
+
"<div>\n",
|
741 |
+
"<style scoped>\n",
|
742 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
743 |
+
" vertical-align: middle;\n",
|
744 |
+
" }\n",
|
745 |
+
"\n",
|
746 |
+
" .dataframe tbody tr th {\n",
|
747 |
+
" vertical-align: top;\n",
|
748 |
+
" }\n",
|
749 |
+
"\n",
|
750 |
+
" .dataframe thead tr th {\n",
|
751 |
+
" text-align: left;\n",
|
752 |
+
" }\n",
|
753 |
+
"\n",
|
754 |
+
" .dataframe thead tr:last-of-type th {\n",
|
755 |
+
" text-align: right;\n",
|
756 |
+
" }\n",
|
757 |
+
"</style>\n",
|
758 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
759 |
+
" <thead>\n",
|
760 |
+
" <tr>\n",
|
761 |
+
" <th></th>\n",
|
762 |
+
" <th colspan=\"3\" halign=\"left\">Rating</th>\n",
|
763 |
+
" <th>percentage</th>\n",
|
764 |
+
" </tr>\n",
|
765 |
+
" <tr>\n",
|
766 |
+
" <th></th>\n",
|
767 |
+
" <th>size</th>\n",
|
768 |
+
" <th>sum</th>\n",
|
769 |
+
" <th>mean</th>\n",
|
770 |
+
" <th></th>\n",
|
771 |
+
" </tr>\n",
|
772 |
+
" <tr>\n",
|
773 |
+
" <th>productId</th>\n",
|
774 |
+
" <th></th>\n",
|
775 |
+
" <th></th>\n",
|
776 |
+
" <th></th>\n",
|
777 |
+
" <th></th>\n",
|
778 |
+
" </tr>\n",
|
779 |
+
" </thead>\n",
|
780 |
+
" <tbody>\n",
|
781 |
+
" <tr>\n",
|
782 |
+
" <th>B00001P4ZH</th>\n",
|
783 |
+
" <td>1682</td>\n",
|
784 |
+
" <td>7486</td>\n",
|
785 |
+
" <td>4.450654</td>\n",
|
786 |
+
" <td>5.422869</td>\n",
|
787 |
+
" </tr>\n",
|
788 |
+
" <tr>\n",
|
789 |
+
" <th>B00001WRSJ</th>\n",
|
790 |
+
" <td>1280</td>\n",
|
791 |
+
" <td>5897</td>\n",
|
792 |
+
" <td>4.607031</td>\n",
|
793 |
+
" <td>4.271795</td>\n",
|
794 |
+
" </tr>\n",
|
795 |
+
" <tr>\n",
|
796 |
+
" <th>B00004T8R2</th>\n",
|
797 |
+
" <td>1379</td>\n",
|
798 |
+
" <td>5880</td>\n",
|
799 |
+
" <td>4.263959</td>\n",
|
800 |
+
" <td>4.259481</td>\n",
|
801 |
+
" </tr>\n",
|
802 |
+
" <tr>\n",
|
803 |
+
" <th>0972683275</th>\n",
|
804 |
+
" <td>824</td>\n",
|
805 |
+
" <td>3701</td>\n",
|
806 |
+
" <td>4.491505</td>\n",
|
807 |
+
" <td>2.681010</td>\n",
|
808 |
+
" </tr>\n",
|
809 |
+
" <tr>\n",
|
810 |
+
" <th>B00004SABB</th>\n",
|
811 |
+
" <td>796</td>\n",
|
812 |
+
" <td>3200</td>\n",
|
813 |
+
" <td>4.020101</td>\n",
|
814 |
+
" <td>2.318085</td>\n",
|
815 |
+
" </tr>\n",
|
816 |
+
" <tr>\n",
|
817 |
+
" <th>...</th>\n",
|
818 |
+
" <td>...</td>\n",
|
819 |
+
" <td>...</td>\n",
|
820 |
+
" <td>...</td>\n",
|
821 |
+
" <td>...</td>\n",
|
822 |
+
" </tr>\n",
|
823 |
+
" <tr>\n",
|
824 |
+
" <th>0899336795</th>\n",
|
825 |
+
" <td>9</td>\n",
|
826 |
+
" <td>15</td>\n",
|
827 |
+
" <td>1.666667</td>\n",
|
828 |
+
" <td>0.010866</td>\n",
|
829 |
+
" </tr>\n",
|
830 |
+
" <tr>\n",
|
831 |
+
" <th>B00000JH72</th>\n",
|
832 |
+
" <td>10</td>\n",
|
833 |
+
" <td>15</td>\n",
|
834 |
+
" <td>1.500000</td>\n",
|
835 |
+
" <td>0.010866</td>\n",
|
836 |
+
" </tr>\n",
|
837 |
+
" <tr>\n",
|
838 |
+
" <th>9269807207</th>\n",
|
839 |
+
" <td>8</td>\n",
|
840 |
+
" <td>14</td>\n",
|
841 |
+
" <td>1.750000</td>\n",
|
842 |
+
" <td>0.010142</td>\n",
|
843 |
+
" </tr>\n",
|
844 |
+
" <tr>\n",
|
845 |
+
" <th>B00000J4DT</th>\n",
|
846 |
+
" <td>5</td>\n",
|
847 |
+
" <td>8</td>\n",
|
848 |
+
" <td>1.600000</td>\n",
|
849 |
+
" <td>0.005795</td>\n",
|
850 |
+
" </tr>\n",
|
851 |
+
" <tr>\n",
|
852 |
+
" <th>B00000JII9</th>\n",
|
853 |
+
" <td>5</td>\n",
|
854 |
+
" <td>6</td>\n",
|
855 |
+
" <td>1.200000</td>\n",
|
856 |
+
" <td>0.004346</td>\n",
|
857 |
+
" </tr>\n",
|
858 |
+
" </tbody>\n",
|
859 |
+
"</table>\n",
|
860 |
+
"<p>737 rows × 4 columns</p>\n",
|
861 |
+
"</div>"
|
862 |
+
],
|
863 |
+
"text/plain": [
|
864 |
+
" Rating percentage\n",
|
865 |
+
" size sum mean \n",
|
866 |
+
"productId \n",
|
867 |
+
"B00001P4ZH 1682 7486 4.450654 5.422869\n",
|
868 |
+
"B00001WRSJ 1280 5897 4.607031 4.271795\n",
|
869 |
+
"B00004T8R2 1379 5880 4.263959 4.259481\n",
|
870 |
+
"0972683275 824 3701 4.491505 2.681010\n",
|
871 |
+
"B00004SABB 796 3200 4.020101 2.318085\n",
|
872 |
+
"... ... ... ... ...\n",
|
873 |
+
"0899336795 9 15 1.666667 0.010866\n",
|
874 |
+
"B00000JH72 10 15 1.500000 0.010866\n",
|
875 |
+
"9269807207 8 14 1.750000 0.010142\n",
|
876 |
+
"B00000J4DT 5 8 1.600000 0.005795\n",
|
877 |
+
"B00000JII9 5 6 1.200000 0.004346\n",
|
878 |
+
"\n",
|
879 |
+
"[737 rows x 4 columns]"
|
880 |
+
]
|
881 |
+
},
|
882 |
+
"execution_count": 23,
|
883 |
+
"metadata": {},
|
884 |
+
"output_type": "execute_result"
|
885 |
+
}
|
886 |
+
],
|
887 |
+
"source": [
|
888 |
+
"populer_product"
|
889 |
+
]
|
890 |
+
},
|
891 |
+
{
|
892 |
+
"cell_type": "code",
|
893 |
+
"execution_count": 24,
|
894 |
+
"id": "630bdf75",
|
895 |
+
"metadata": {},
|
896 |
+
"outputs": [],
|
897 |
+
"source": [
|
898 |
+
"populer_product['Rank'] = populer_product['percentage'].rank(ascending=False)\n",
|
899 |
+
"# TR = populer_movies veri setindeki 'percentage' sütununa göre her filme bir sıralama (rank) numarası veriyoruz.\n",
|
900 |
+
"# Bu sıralama, yüzdelik değerlere göre azalan sırayla yapılır, yani en yüksek yüzdelik değere sahip film birinci sıradadır.\n",
|
901 |
+
"\n",
|
902 |
+
"# EN = We are assigning a rank to each movie in the populer_movies dataset based on the 'percentage' column.\n",
|
903 |
+
"# The ranking is done in descending order, meaning the movie with the highest percentage gets the top rank."
|
904 |
+
]
|
905 |
+
},
|
906 |
+
{
|
907 |
+
"cell_type": "code",
|
908 |
+
"execution_count": 25,
|
909 |
+
"id": "4f9a4022",
|
910 |
+
"metadata": {},
|
911 |
+
"outputs": [
|
912 |
+
{
|
913 |
+
"data": {
|
914 |
+
"text/html": [
|
915 |
+
"<div>\n",
|
916 |
+
"<style scoped>\n",
|
917 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
918 |
+
" vertical-align: middle;\n",
|
919 |
+
" }\n",
|
920 |
+
"\n",
|
921 |
+
" .dataframe tbody tr th {\n",
|
922 |
+
" vertical-align: top;\n",
|
923 |
+
" }\n",
|
924 |
+
"\n",
|
925 |
+
" .dataframe thead tr th {\n",
|
926 |
+
" text-align: left;\n",
|
927 |
+
" }\n",
|
928 |
+
"\n",
|
929 |
+
" .dataframe thead tr:last-of-type th {\n",
|
930 |
+
" text-align: right;\n",
|
931 |
+
" }\n",
|
932 |
+
"</style>\n",
|
933 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
934 |
+
" <thead>\n",
|
935 |
+
" <tr>\n",
|
936 |
+
" <th></th>\n",
|
937 |
+
" <th colspan=\"3\" halign=\"left\">Rating</th>\n",
|
938 |
+
" <th>percentage</th>\n",
|
939 |
+
" <th>Rank</th>\n",
|
940 |
+
" </tr>\n",
|
941 |
+
" <tr>\n",
|
942 |
+
" <th></th>\n",
|
943 |
+
" <th>size</th>\n",
|
944 |
+
" <th>sum</th>\n",
|
945 |
+
" <th>mean</th>\n",
|
946 |
+
" <th></th>\n",
|
947 |
+
" <th></th>\n",
|
948 |
+
" </tr>\n",
|
949 |
+
" <tr>\n",
|
950 |
+
" <th>productId</th>\n",
|
951 |
+
" <th></th>\n",
|
952 |
+
" <th></th>\n",
|
953 |
+
" <th></th>\n",
|
954 |
+
" <th></th>\n",
|
955 |
+
" <th></th>\n",
|
956 |
+
" </tr>\n",
|
957 |
+
" </thead>\n",
|
958 |
+
" <tbody>\n",
|
959 |
+
" <tr>\n",
|
960 |
+
" <th>B00001P4ZH</th>\n",
|
961 |
+
" <td>1682</td>\n",
|
962 |
+
" <td>7486</td>\n",
|
963 |
+
" <td>4.450654</td>\n",
|
964 |
+
" <td>5.422869</td>\n",
|
965 |
+
" <td>1.0</td>\n",
|
966 |
+
" </tr>\n",
|
967 |
+
" <tr>\n",
|
968 |
+
" <th>B00001WRSJ</th>\n",
|
969 |
+
" <td>1280</td>\n",
|
970 |
+
" <td>5897</td>\n",
|
971 |
+
" <td>4.607031</td>\n",
|
972 |
+
" <td>4.271795</td>\n",
|
973 |
+
" <td>2.0</td>\n",
|
974 |
+
" </tr>\n",
|
975 |
+
" <tr>\n",
|
976 |
+
" <th>B00004T8R2</th>\n",
|
977 |
+
" <td>1379</td>\n",
|
978 |
+
" <td>5880</td>\n",
|
979 |
+
" <td>4.263959</td>\n",
|
980 |
+
" <td>4.259481</td>\n",
|
981 |
+
" <td>3.0</td>\n",
|
982 |
+
" </tr>\n",
|
983 |
+
" <tr>\n",
|
984 |
+
" <th>0972683275</th>\n",
|
985 |
+
" <td>824</td>\n",
|
986 |
+
" <td>3701</td>\n",
|
987 |
+
" <td>4.491505</td>\n",
|
988 |
+
" <td>2.681010</td>\n",
|
989 |
+
" <td>4.0</td>\n",
|
990 |
+
" </tr>\n",
|
991 |
+
" <tr>\n",
|
992 |
+
" <th>B00004SABB</th>\n",
|
993 |
+
" <td>796</td>\n",
|
994 |
+
" <td>3200</td>\n",
|
995 |
+
" <td>4.020101</td>\n",
|
996 |
+
" <td>2.318085</td>\n",
|
997 |
+
" <td>5.0</td>\n",
|
998 |
+
" </tr>\n",
|
999 |
+
" </tbody>\n",
|
1000 |
+
"</table>\n",
|
1001 |
+
"</div>"
|
1002 |
+
],
|
1003 |
+
"text/plain": [
|
1004 |
+
" Rating percentage Rank\n",
|
1005 |
+
" size sum mean \n",
|
1006 |
+
"productId \n",
|
1007 |
+
"B00001P4ZH 1682 7486 4.450654 5.422869 1.0\n",
|
1008 |
+
"B00001WRSJ 1280 5897 4.607031 4.271795 2.0\n",
|
1009 |
+
"B00004T8R2 1379 5880 4.263959 4.259481 3.0\n",
|
1010 |
+
"0972683275 824 3701 4.491505 2.681010 4.0\n",
|
1011 |
+
"B00004SABB 796 3200 4.020101 2.318085 5.0"
|
1012 |
+
]
|
1013 |
+
},
|
1014 |
+
"execution_count": 25,
|
1015 |
+
"metadata": {},
|
1016 |
+
"output_type": "execute_result"
|
1017 |
+
}
|
1018 |
+
],
|
1019 |
+
"source": [
|
1020 |
+
"populer_product.head()"
|
1021 |
+
]
|
1022 |
+
},
|
1023 |
+
{
|
1024 |
+
"cell_type": "raw",
|
1025 |
+
"id": "5420c74e",
|
1026 |
+
"metadata": {},
|
1027 |
+
"source": []
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"cell_type": "markdown",
|
1031 |
+
"id": "284e8c72",
|
1032 |
+
"metadata": {},
|
1033 |
+
"source": [
|
1034 |
+
"## <font color='#0F52BA'> Öznitelik Mühendisliği - Feature Engineering"
|
1035 |
+
]
|
1036 |
+
},
|
1037 |
+
{
|
1038 |
+
"cell_type": "markdown",
|
1039 |
+
"id": "5fa38009",
|
1040 |
+
"metadata": {},
|
1041 |
+
"source": [
|
1042 |
+
"### <font color=#007fff> Model - Modelling"
|
1043 |
+
]
|
1044 |
+
},
|
1045 |
+
{
|
1046 |
+
"cell_type": "markdown",
|
1047 |
+
"id": "3421302f",
|
1048 |
+
"metadata": {},
|
1049 |
+
"source": []
|
1050 |
+
},
|
1051 |
+
{
|
1052 |
+
"cell_type": "code",
|
1053 |
+
"execution_count": 26,
|
1054 |
+
"id": "b547768d",
|
1055 |
+
"metadata": {},
|
1056 |
+
"outputs": [
|
1057 |
+
{
|
1058 |
+
"data": {
|
1059 |
+
"text/html": [
|
1060 |
+
"<div>\n",
|
1061 |
+
"<style scoped>\n",
|
1062 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
1063 |
+
" vertical-align: middle;\n",
|
1064 |
+
" }\n",
|
1065 |
+
"\n",
|
1066 |
+
" .dataframe tbody tr th {\n",
|
1067 |
+
" vertical-align: top;\n",
|
1068 |
+
" }\n",
|
1069 |
+
"\n",
|
1070 |
+
" .dataframe thead th {\n",
|
1071 |
+
" text-align: right;\n",
|
1072 |
+
" }\n",
|
1073 |
+
"</style>\n",
|
1074 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
1075 |
+
" <thead>\n",
|
1076 |
+
" <tr style=\"text-align: right;\">\n",
|
1077 |
+
" <th></th>\n",
|
1078 |
+
" <th>userId</th>\n",
|
1079 |
+
" <th>productId</th>\n",
|
1080 |
+
" <th>Rating</th>\n",
|
1081 |
+
" </tr>\n",
|
1082 |
+
" </thead>\n",
|
1083 |
+
" <tbody>\n",
|
1084 |
+
" <tr>\n",
|
1085 |
+
" <th>0</th>\n",
|
1086 |
+
" <td>AOPE42H34R0EC</td>\n",
|
1087 |
+
" <td>B00000DM9W</td>\n",
|
1088 |
+
" <td>5</td>\n",
|
1089 |
+
" </tr>\n",
|
1090 |
+
" </tbody>\n",
|
1091 |
+
"</table>\n",
|
1092 |
+
"</div>"
|
1093 |
+
],
|
1094 |
+
"text/plain": [
|
1095 |
+
" userId productId Rating\n",
|
1096 |
+
"0 AOPE42H34R0EC B00000DM9W 5"
|
1097 |
+
]
|
1098 |
+
},
|
1099 |
+
"execution_count": 26,
|
1100 |
+
"metadata": {},
|
1101 |
+
"output_type": "execute_result"
|
1102 |
+
}
|
1103 |
+
],
|
1104 |
+
"source": [
|
1105 |
+
"df.head(1)"
|
1106 |
+
]
|
1107 |
+
},
|
1108 |
+
{
|
1109 |
+
"cell_type": "code",
|
1110 |
+
"execution_count": 27,
|
1111 |
+
"id": "7238cf8e",
|
1112 |
+
"metadata": {},
|
1113 |
+
"outputs": [],
|
1114 |
+
"source": [
|
1115 |
+
"x = df.drop('Rating', axis=1)\n",
|
1116 |
+
"y = df['Rating']"
|
1117 |
+
]
|
1118 |
+
},
|
1119 |
+
{
|
1120 |
+
"cell_type": "code",
|
1121 |
+
"execution_count": 28,
|
1122 |
+
"id": "25bb1daa",
|
1123 |
+
"metadata": {},
|
1124 |
+
"outputs": [],
|
1125 |
+
"source": [
|
1126 |
+
"x=pd.get_dummies(x,drop_first=True)\n",
|
1127 |
+
"# Tr = kategorik değişkenlerin sayısal değişkenlere dönüştürülmesi için kullanılır.\n",
|
1128 |
+
"# En = It is used to convert categorical variables into numerical variables."
|
1129 |
+
]
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"cell_type": "code",
|
1133 |
+
"execution_count": 29,
|
1134 |
+
"id": "a1aff026",
|
1135 |
+
"metadata": {},
|
1136 |
+
"outputs": [],
|
1137 |
+
"source": [
|
1138 |
+
"x.fillna(0, inplace=True)\n",
|
1139 |
+
"y.fillna(0, inplace=True)"
|
1140 |
+
]
|
1141 |
+
},
|
1142 |
+
{
|
1143 |
+
"cell_type": "code",
|
1144 |
+
"execution_count": 30,
|
1145 |
+
"id": "4abeb9bc",
|
1146 |
+
"metadata": {},
|
1147 |
+
"outputs": [],
|
1148 |
+
"source": [
|
1149 |
+
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)\n",
|
1150 |
+
"# TR = Veriyi eğitim ve test seti olarak ayırır; %20 test seti, %80 eğitim seti olacak şekilde bölünür\n",
|
1151 |
+
"# EN = Splits the data into training and test sets; 20% for the test set and 80% for the training set"
|
1152 |
+
]
|
1153 |
+
},
|
1154 |
+
{
|
1155 |
+
"cell_type": "code",
|
1156 |
+
"execution_count": 31,
|
1157 |
+
"id": "27e59cab",
|
1158 |
+
"metadata": {},
|
1159 |
+
"outputs": [
|
1160 |
+
{
|
1161 |
+
"ename": "MemoryError",
|
1162 |
+
"evalue": "Unable to allocate 6.71 GiB for an array with shape (33119, 27180) and data type float64",
|
1163 |
+
"output_type": "error",
|
1164 |
+
"traceback": [
|
1165 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
1166 |
+
"\u001b[1;31mMemoryError\u001b[0m Traceback (most recent call last)",
|
1167 |
+
"Cell \u001b[1;32mIn[31], line 14\u001b[0m\n\u001b[0;32m 1\u001b[0m scaler \u001b[38;5;241m=\u001b[39m StandardScaler()\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# TR = Avantajları:\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# TR = Negatif ve pozitif değerler içeren verilerde performansı artırabilir.\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# TR = Aykırı değerlere karşı daha dayanıklıdır.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# EN = Disadvantages:\u001b[39;00m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# EN = Does not fit the data into a specific range (e.g., between 0 and 1).\u001b[39;00m\n\u001b[1;32m---> 14\u001b[0m x_train \u001b[38;5;241m=\u001b[39m scaler\u001b[38;5;241m.\u001b[39mfit_transform(x_train)\n\u001b[0;32m 15\u001b[0m \u001b[38;5;66;03m# TR = Verileri ölçekleyerek, modelin daha hızlı ve etkili öğrenmesini sağlamak için tüm özellikleri aynı aralığa getiriyoruz.\u001b[39;00m\n\u001b[0;32m 16\u001b[0m \u001b[38;5;66;03m# EN = By scaling the data, we bring all the features into the same range to allow the model to learn faster and more effectively.\u001b[39;00m\n\u001b[0;32m 18\u001b[0m x_test \u001b[38;5;241m=\u001b[39m scaler\u001b[38;5;241m.\u001b[39mtransform(x_test)\n",
|
1168 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\_set_output.py:316\u001b[0m, in \u001b[0;36m_wrap_method_output.<locals>.wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m 314\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(f)\n\u001b[0;32m 315\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 316\u001b[0m data_to_wrap \u001b[38;5;241m=\u001b[39m f(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_to_wrap, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m 318\u001b[0m \u001b[38;5;66;03m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m 319\u001b[0m return_tuple \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 320\u001b[0m _wrap_data_with_container(method, data_to_wrap[\u001b[38;5;241m0\u001b[39m], X, \u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m 321\u001b[0m \u001b[38;5;241m*\u001b[39mdata_to_wrap[\u001b[38;5;241m1\u001b[39m:],\n\u001b[0;32m 322\u001b[0m )\n",
|
1169 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\base.py:1098\u001b[0m, in \u001b[0;36mTransformerMixin.fit_transform\u001b[1;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[0;32m 1083\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 1084\u001b[0m (\n\u001b[0;32m 1085\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis object (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) has a `transform`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1093\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[0;32m 1094\u001b[0m )\n\u001b[0;32m 1096\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 1097\u001b[0m \u001b[38;5;66;03m# fit method of arity 1 (unsupervised transformation)\u001b[39;00m\n\u001b[1;32m-> 1098\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n\u001b[0;32m 1099\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1100\u001b[0m \u001b[38;5;66;03m# fit method of arity 2 (supervised transformation)\u001b[39;00m\n\u001b[0;32m 1101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n",
|
1170 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\preprocessing\\_data.py:878\u001b[0m, in \u001b[0;36mStandardScaler.fit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 876\u001b[0m \u001b[38;5;66;03m# Reset internal state before fitting\u001b[39;00m\n\u001b[0;32m 877\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset()\n\u001b[1;32m--> 878\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpartial_fit(X, y, sample_weight)\n",
|
1171 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
|
1172 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\preprocessing\\_data.py:914\u001b[0m, in \u001b[0;36mStandardScaler.partial_fit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 882\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Online computation of mean and std on X for later scaling.\u001b[39;00m\n\u001b[0;32m 883\u001b[0m \n\u001b[0;32m 884\u001b[0m \u001b[38;5;124;03mAll of X is processed as a single batch. This is intended for cases\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 911\u001b[0m \u001b[38;5;124;03m Fitted scaler.\u001b[39;00m\n\u001b[0;32m 912\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 913\u001b[0m first_call \u001b[38;5;241m=\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_samples_seen_\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 914\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_data(\n\u001b[0;32m 915\u001b[0m X,\n\u001b[0;32m 916\u001b[0m accept_sparse\u001b[38;5;241m=\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcsr\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcsc\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[0;32m 917\u001b[0m dtype\u001b[38;5;241m=\u001b[39mFLOAT_DTYPES,\n\u001b[0;32m 918\u001b[0m force_all_finite\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow-nan\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 919\u001b[0m reset\u001b[38;5;241m=\u001b[39mfirst_call,\n\u001b[0;32m 920\u001b[0m )\n\u001b[0;32m 921\u001b[0m n_features \u001b[38;5;241m=\u001b[39m X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 923\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sample_weight \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
1173 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\base.py:633\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[1;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[0;32m 631\u001b[0m out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[0;32m 632\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m no_val_y:\n\u001b[1;32m--> 633\u001b[0m out \u001b[38;5;241m=\u001b[39m check_array(X, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n\u001b[0;32m 634\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_y:\n\u001b[0;32m 635\u001b[0m out \u001b[38;5;241m=\u001b[39m _check_y(y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n",
|
1174 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\validation.py:1012\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m 1010\u001b[0m array \u001b[38;5;241m=\u001b[39m xp\u001b[38;5;241m.\u001b[39mastype(array, dtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m 1011\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1012\u001b[0m array \u001b[38;5;241m=\u001b[39m _asarray_with_order(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype, xp\u001b[38;5;241m=\u001b[39mxp)\n\u001b[0;32m 1013\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[0;32m 1014\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 1015\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplex data not supported\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(array)\n\u001b[0;32m 1016\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcomplex_warning\u001b[39;00m\n",
|
1175 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\_array_api.py:745\u001b[0m, in \u001b[0;36m_asarray_with_order\u001b[1;34m(array, dtype, order, copy, xp, device)\u001b[0m\n\u001b[0;32m 743\u001b[0m array \u001b[38;5;241m=\u001b[39m numpy\u001b[38;5;241m.\u001b[39marray(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 744\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 745\u001b[0m array \u001b[38;5;241m=\u001b[39m numpy\u001b[38;5;241m.\u001b[39masarray(array, order\u001b[38;5;241m=\u001b[39morder, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 747\u001b[0m \u001b[38;5;66;03m# At this point array is a NumPy ndarray. We convert it to an array\u001b[39;00m\n\u001b[0;32m 748\u001b[0m \u001b[38;5;66;03m# container that is consistent with the input's namespace.\u001b[39;00m\n\u001b[0;32m 749\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m xp\u001b[38;5;241m.\u001b[39masarray(array)\n",
|
1176 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py:2083\u001b[0m, in \u001b[0;36mNDFrame.__array__\u001b[1;34m(self, dtype)\u001b[0m\n\u001b[0;32m 2082\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__array__\u001b[39m(\u001b[38;5;28mself\u001b[39m, dtype: npt\u001b[38;5;241m.\u001b[39mDTypeLike \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m-> 2083\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values\n\u001b[0;32m 2084\u001b[0m arr \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(values, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 2085\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 2086\u001b[0m astype_is_view(values\u001b[38;5;241m.\u001b[39mdtype, arr\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[0;32m 2087\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write()\n\u001b[0;32m 2088\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mgr\u001b[38;5;241m.\u001b[39mis_single_block\n\u001b[0;32m 2089\u001b[0m ):\n\u001b[0;32m 2090\u001b[0m \u001b[38;5;66;03m# Check if both conversions can be done without a copy\u001b[39;00m\n",
|
1177 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\core\\frame.py:1046\u001b[0m, in \u001b[0;36mDataFrame._values\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1044\u001b[0m blocks \u001b[38;5;241m=\u001b[39m mgr\u001b[38;5;241m.\u001b[39mblocks\n\u001b[0;32m 1045\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(blocks) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m-> 1046\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ensure_wrapped_if_datetimelike(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues)\n\u001b[0;32m 1048\u001b[0m arr \u001b[38;5;241m=\u001b[39m blocks[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m 1049\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 1050\u001b[0m \u001b[38;5;66;03m# non-2D ExtensionArray\u001b[39;00m\n",
|
1178 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\core\\frame.py:12281\u001b[0m, in \u001b[0;36mDataFrame.values\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 12207\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[0;32m 12208\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalues\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[0;32m 12209\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 12210\u001b[0m \u001b[38;5;124;03m Return a Numpy representation of the DataFrame.\u001b[39;00m\n\u001b[0;32m 12211\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12279\u001b[0m \u001b[38;5;124;03m ['monkey', nan, None]], dtype=object)\u001b[39;00m\n\u001b[0;32m 12280\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m> 12281\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mgr\u001b[38;5;241m.\u001b[39mas_array()\n",
|
1179 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:1656\u001b[0m, in \u001b[0;36mBlockManager.as_array\u001b[1;34m(self, dtype, copy, na_value)\u001b[0m\n\u001b[0;32m 1654\u001b[0m arr\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m 1655\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1656\u001b[0m arr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_interleave(dtype\u001b[38;5;241m=\u001b[39mdtype, na_value\u001b[38;5;241m=\u001b[39mna_value)\n\u001b[0;32m 1657\u001b[0m \u001b[38;5;66;03m# The underlying data was copied within _interleave, so no need\u001b[39;00m\n\u001b[0;32m 1658\u001b[0m \u001b[38;5;66;03m# to further copy if copy=True or setting na_value\u001b[39;00m\n\u001b[0;32m 1660\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_value \u001b[38;5;129;01mis\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mno_default:\n",
|
1180 |
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\core\\internals\\managers.py:1689\u001b[0m, in \u001b[0;36mBlockManager._interleave\u001b[1;34m(self, dtype, na_value)\u001b[0m\n\u001b[0;32m 1686\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"ensure_np_dtype\" has incompatible type\u001b[39;00m\n\u001b[0;32m 1687\u001b[0m \u001b[38;5;66;03m# \"Optional[dtype[Any]]\"; expected \"Union[dtype[Any], ExtensionDtype]\"\u001b[39;00m\n\u001b[0;32m 1688\u001b[0m dtype \u001b[38;5;241m=\u001b[39m ensure_np_dtype(dtype) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m-> 1689\u001b[0m result \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshape, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[0;32m 1691\u001b[0m itemmask \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m 1693\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m np\u001b[38;5;241m.\u001b[39mdtype(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobject\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m na_value \u001b[38;5;129;01mis\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mno_default:\n\u001b[0;32m 1694\u001b[0m \u001b[38;5;66;03m# much more performant than using to_numpy below\u001b[39;00m\n",
|
1181 |
+
"\u001b[1;31mMemoryError\u001b[0m: Unable to allocate 6.71 GiB for an array with shape (33119, 27180) and data type float64"
|
1182 |
+
]
|
1183 |
+
}
|
1184 |
+
],
|
1185 |
+
"source": [
|
1186 |
+
"scaler = StandardScaler()\n",
|
1187 |
+
"# TR = Avantajları:\n",
|
1188 |
+
"# TR = Negatif ve pozitif değerler içeren verilerde performansı artırabilir.\n",
|
1189 |
+
"# TR = Aykırı değerlere karşı daha dayanıklıdır.\n",
|
1190 |
+
"# TR = Dezavantajları:\n",
|
1191 |
+
"# TR = Veriyi kesin bir aralığa sığdırmaz (örneğin, 0 ile 1 arasında).\n",
|
1192 |
+
"\n",
|
1193 |
+
"# EN = Advantages:\n",
|
1194 |
+
"# EN = Can improve performance for data containing both negative and positive values.\n",
|
1195 |
+
"# EN = More robust to outliers.\n",
|
1196 |
+
"# EN = Disadvantages:\n",
|
1197 |
+
"# EN = Does not fit the data into a specific range (e.g., between 0 and 1).\n",
|
1198 |
+
"\n",
|
1199 |
+
"x_train = scaler.fit_transform(x_train)\n",
|
1200 |
+
"# TR = Verileri ölçekleyerek, modelin daha hızlı ve etkili öğrenmesini sağlamak için tüm özellikleri aynı aralığa getiriyoruz.\n",
|
1201 |
+
"# EN = By scaling the data, we bring all the features into the same range to allow the model to learn faster and more effectively.\n",
|
1202 |
+
"\n",
|
1203 |
+
"x_test = scaler.transform(x_test)\n",
|
1204 |
+
"# TR = Test verilerini eğitimde kullanılan aynı ölçekle dönüştürerek modelin test verileri üzerinde doğru tahminler yapmasını sağlar.\n",
|
1205 |
+
"# EN = It enables the model to make accurate predictions on the test data by transforming the test data to the same scale used in training."
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
{
|
1209 |
+
"cell_type": "code",
|
1210 |
+
"execution_count": null,
|
1211 |
+
"id": "947b0cdb",
|
1212 |
+
"metadata": {},
|
1213 |
+
"outputs": [],
|
1214 |
+
"source": [
|
1215 |
+
"model = Sequential()\n",
|
1216 |
+
"# TR = Modelin katmanlarını sırayla eklemek için Sequential kullanıyoruz.\n",
|
1217 |
+
"# EN = We use Sequential to add layers of the model in order.\n",
|
1218 |
+
"\n",
|
1219 |
+
"model.add(Dense(128, input_dim=x_train.shape[1]))\n",
|
1220 |
+
"# TR = 128 nöronlu bir katman ekliyoruz.\n",
|
1221 |
+
"# TR = input_dim=x_train.shape[1] Modelin ilk katmanına giriş verilerinin boyutunu tanımlayarak her özelliğin doğru şekilde işlenmesini sağlar.\n",
|
1222 |
+
"\n",
|
1223 |
+
"# EN = We add a layer with 128 neurons.\n",
|
1224 |
+
"# EN = input_dim=x_train.shape[1] Defines the size of the input data in the first layer of the model, ensuring that each feature is processed correctly.\n",
|
1225 |
+
"\n",
|
1226 |
+
"model.add(Dense(64))\n",
|
1227 |
+
"model.add(Dense(32))\n",
|
1228 |
+
"\n",
|
1229 |
+
"model.add(Flatten())\n",
|
1230 |
+
"\n",
|
1231 |
+
"model.add(Dense(1, activation='linear'))\n",
|
1232 |
+
"# TR = Regresyon görevleri için lineer aktivasyon kullanıyoruz.\n",
|
1233 |
+
"# EN = We use linear activation for regression tasks.\n",
|
1234 |
+
"\n",
|
1235 |
+
"model.compile(loss='mse', optimizer='adam',metrics=['mean_squared_error'])\n",
|
1236 |
+
"# TR = Modeli ortalama kare hata (mse) kaybı ile derliyoruz.\n",
|
1237 |
+
"# EN = We compile the model with mean square error (mse) loss.\n",
|
1238 |
+
"\n",
|
1239 |
+
"early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)\n",
|
1240 |
+
"# TR = EarlyStopping ekleyin: Eğitim sırasında model performansı iyileşmediğinde erken durması için kullanıyoruz.\n",
|
1241 |
+
"# EN = Add EarlyStopping: We use it to stop early when model performance does not improve during training.\n",
|
1242 |
+
"\n",
|
1243 |
+
"# TR = val_loss 10 epoch boyunca iyileşmezse eğitimi durduruyor ve en iyi ağırlıkları geri yüklüyor.\n",
|
1244 |
+
"# EN = If val_loss does not improve for 10 epochs, it stops training and restores the best weights.\n",
|
1245 |
+
"\n",
|
1246 |
+
"history=model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=128, epochs=100, callbacks=[early_stopping])\n",
|
1247 |
+
"# TR = Modeli 100 epoch boyunca eğitiyoruz, fakat EarlyStopping ile durdurulabilir. Batch boyutu 128 olarak belirlenmiş.\n",
|
1248 |
+
"# EN = We train the model for 100 epochs, but it can be stopped with EarlyStopping. Batch size is set to 128."
|
1249 |
+
]
|
1250 |
+
},
|
1251 |
+
{
|
1252 |
+
"cell_type": "code",
|
1253 |
+
"execution_count": null,
|
1254 |
+
"id": "d3de4b0a",
|
1255 |
+
"metadata": {},
|
1256 |
+
"outputs": [],
|
1257 |
+
"source": [
|
1258 |
+
"model.summary()"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
{
|
1262 |
+
"cell_type": "code",
|
1263 |
+
"execution_count": null,
|
1264 |
+
"id": "db9170d6",
|
1265 |
+
"metadata": {},
|
1266 |
+
"outputs": [],
|
1267 |
+
"source": [
|
1268 |
+
"test_loss = model.evaluate(x_test, y_test)\n",
|
1269 |
+
"\n",
|
1270 |
+
"# TR = İlk eleman kayıp değerini, ikinci eleman ise hesaplanan ek metriği içerir.\n",
|
1271 |
+
"# EN = The first element contains the loss value, the second contains the additional metric.\n",
|
1272 |
+
"\n",
|
1273 |
+
"print(f\"Test kaybı: {test_loss[0]:.4f}\")\n",
|
1274 |
+
"print(f\"Mean Squared Error: {test_loss[1]:.4f}\")"
|
1275 |
+
]
|
1276 |
+
},
|
1277 |
+
{
|
1278 |
+
"cell_type": "code",
|
1279 |
+
"execution_count": null,
|
1280 |
+
"id": "09c4a1cc",
|
1281 |
+
"metadata": {},
|
1282 |
+
"outputs": [],
|
1283 |
+
"source": [
|
1284 |
+
"pred = model.predict(x_test)\n",
|
1285 |
+
"# TR = modeli x_test ile predict özelliği ile tahmin ettik. predict=tahmin demek. Dahmin edip pred eşitledik.\n",
|
1286 |
+
"# EN = We predicted the model with x_test and the predict feature. predict=means prediction. We guessed and equalized the pred"
|
1287 |
+
]
|
1288 |
+
},
|
1289 |
+
{
|
1290 |
+
"cell_type": "code",
|
1291 |
+
"execution_count": null,
|
1292 |
+
"id": "a1061b4b",
|
1293 |
+
"metadata": {},
|
1294 |
+
"outputs": [],
|
1295 |
+
"source": [
|
1296 |
+
"r2_score(y_test, pred)\n",
|
1297 |
+
"# TR = Bunu gerçek(y_test) değer ile tahmin(pred) edilen değerleri karşılaştır ve arasındaki farkı bul.\n",
|
1298 |
+
"# EN = Compare this with the actual (y_test) value and the predicted (pred) values and find the difference between them."
|
1299 |
+
]
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"cell_type": "code",
|
1303 |
+
"execution_count": null,
|
1304 |
+
"id": "df65a8fe",
|
1305 |
+
"metadata": {},
|
1306 |
+
"outputs": [],
|
1307 |
+
"source": [
|
1308 |
+
"mean_squared_error(y_test,pred)**.5\n",
|
1309 |
+
"# TR = Burada, Root Mean Square Error bulduk. Bunu gerçek(y_test) değer ile tahmin(pred) edilen değerleri karşılaştır arasındaki farkı bul ve **.5 ile karekökünü al.\n",
|
1310 |
+
"# EN = Here, we found Root Mean Square Error. Compare this with the actual (y_test) value and the predicted (pred) values, find the difference and take the square root of **.5."
|
1311 |
+
]
|
1312 |
+
},
|
1313 |
+
{
|
1314 |
+
"cell_type": "code",
|
1315 |
+
"execution_count": null,
|
1316 |
+
"id": "65bcd534",
|
1317 |
+
"metadata": {},
|
1318 |
+
"outputs": [],
|
1319 |
+
"source": [
|
1320 |
+
"mean_absolute_error(y_test,pred)\n",
|
1321 |
+
"# TR = mean_absolute_error ile (y_test,pred) kullanarak değerini hesaplama. \n",
|
1322 |
+
"# EN = Calculating the value of mean_absolute_error using (y_test,pred)."
|
1323 |
+
]
|
1324 |
+
},
|
1325 |
+
{
|
1326 |
+
"cell_type": "code",
|
1327 |
+
"execution_count": null,
|
1328 |
+
"id": "a8e8294f",
|
1329 |
+
"metadata": {},
|
1330 |
+
"outputs": [],
|
1331 |
+
"source": [
|
1332 |
+
"residuals=y_test-pred.flatten()"
|
1333 |
+
]
|
1334 |
+
},
|
1335 |
+
{
|
1336 |
+
"cell_type": "code",
|
1337 |
+
"execution_count": null,
|
1338 |
+
"id": "24f6825a",
|
1339 |
+
"metadata": {},
|
1340 |
+
"outputs": [],
|
1341 |
+
"source": [
|
1342 |
+
"sns.kdeplot(x=residuals,fill=True);\n",
|
1343 |
+
"# TR = Veri dağılımını pürüzsüz bir şekilde tahmin etmek için kullanılan bir yoğunluk grafiğidir. Kernel Yoğunluk Tahmini (KDE) ile verinin altında yatan olasılık dağılımını görselleştirir.\n",
|
1344 |
+
"# EN = It is a density plot used to smoothly estimate the distribution of data. It visualizes the underlying probability distribution of data using Kernel Density Estimation (KDE)."
|
1345 |
+
]
|
1346 |
+
},
|
1347 |
+
{
|
1348 |
+
"cell_type": "code",
|
1349 |
+
"execution_count": null,
|
1350 |
+
"id": "1933b4f4",
|
1351 |
+
"metadata": {},
|
1352 |
+
"outputs": [],
|
1353 |
+
"source": [
|
1354 |
+
"loss_f=pd.DataFrame(history.history)"
|
1355 |
+
]
|
1356 |
+
},
|
1357 |
+
{
|
1358 |
+
"cell_type": "code",
|
1359 |
+
"execution_count": null,
|
1360 |
+
"id": "11e12e4d",
|
1361 |
+
"metadata": {},
|
1362 |
+
"outputs": [],
|
1363 |
+
"source": [
|
1364 |
+
"loss_f.plot();"
|
1365 |
+
]
|
1366 |
+
},
|
1367 |
+
{
|
1368 |
+
"cell_type": "code",
|
1369 |
+
"execution_count": null,
|
1370 |
+
"id": "b2ddacf7",
|
1371 |
+
"metadata": {},
|
1372 |
+
"outputs": [],
|
1373 |
+
"source": [
|
1374 |
+
"pickle.dump(model,open('Amazon.pkl','wb'))"
|
1375 |
+
]
|
1376 |
+
}
|
1377 |
+
],
|
1378 |
+
"metadata": {
|
1379 |
+
"kaggle": {
|
1380 |
+
"accelerator": "none",
|
1381 |
+
"dataSources": [
|
1382 |
+
{
|
1383 |
+
"databundleVersionId": 2671877,
|
1384 |
+
"sourceId": 31137,
|
1385 |
+
"sourceType": "competition"
|
1386 |
+
}
|
1387 |
+
],
|
1388 |
+
"dockerImageVersionId": 30761,
|
1389 |
+
"isGpuEnabled": false,
|
1390 |
+
"isInternetEnabled": true,
|
1391 |
+
"language": "python",
|
1392 |
+
"sourceType": "notebook"
|
1393 |
+
},
|
1394 |
+
"kernelspec": {
|
1395 |
+
"display_name": "Python 3 (ipykernel)",
|
1396 |
+
"language": "python",
|
1397 |
+
"name": "python3"
|
1398 |
+
},
|
1399 |
+
"language_info": {
|
1400 |
+
"codemirror_mode": {
|
1401 |
+
"name": "ipython",
|
1402 |
+
"version": 3
|
1403 |
+
},
|
1404 |
+
"file_extension": ".py",
|
1405 |
+
"mimetype": "text/x-python",
|
1406 |
+
"name": "python",
|
1407 |
+
"nbconvert_exporter": "python",
|
1408 |
+
"pygments_lexer": "ipython3",
|
1409 |
+
"version": "3.12.4"
|
1410 |
+
}
|
1411 |
+
},
|
1412 |
+
"nbformat": 4,
|
1413 |
+
"nbformat_minor": 5
|
1414 |
+
}
|
app.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
# Uygulama başlığı
|
7 |
+
st.title('Amazon Ürün Değerlendirme Sistemi')
|
8 |
+
|
9 |
+
# train.csv dosyasını yükle
|
10 |
+
@st.cache
|
11 |
+
def load_data():
|
12 |
+
data = pd.read_csv('train.csv')
|
13 |
+
return data
|
14 |
+
|
15 |
+
# amazon.pkl modelini yükle
|
16 |
+
@st.cache(allow_output_mutation=True)
|
17 |
+
def load_model():
|
18 |
+
with open('amazon.pkl', 'rb') as f:
|
19 |
+
model = pickle.load(f)
|
20 |
+
return model
|
21 |
+
|
22 |
+
# Verileri yükleyelim
|
23 |
+
data = load_data()
|
24 |
+
|
25 |
+
# Arayüz - Kullanıcı ve Ürün ID seçimi
|
26 |
+
st.sidebar.header("Kullanıcı ve Ürün Seçimi")
|
27 |
+
|
28 |
+
# Kullanıcı ID ve Ürün ID seçici
|
29 |
+
user_id = st.sidebar.selectbox('Kullanıcı ID Seçiniz:', data['userId'].unique())
|
30 |
+
product_id = st.sidebar.selectbox('Ürün ID Seçiniz:', data['productId'].unique())
|
31 |
+
|
32 |
+
# Kullanıcının daha önce verdiği puanlar
|
33 |
+
user_ratings = data[data['userId'] == user_id]
|
34 |
+
|
35 |
+
st.subheader(f"Kullanıcı {user_id} Tarafından Verilen Puanlar")
|
36 |
+
st.dataframe(user_ratings)
|
37 |
+
|
38 |
+
# Tahmin yapmak için model yükleyelim
|
39 |
+
model = load_model()
|
40 |
+
|
41 |
+
# Ürün puanı tahmini (örnek model kullanımı)
|
42 |
+
if st.button('Puan Tahmini Yap'):
|
43 |
+
# Modelin input formatına göre değiştirin
|
44 |
+
rating_prediction = model.predict([[user_id, product_id]])
|
45 |
+
st.write(f"Tahmini Puan: {rating_prediction[0]:.2f}")
|
46 |
+
|
47 |
+
# train.csv'deki ilk 5 kaydı göster
|
48 |
+
st.subheader("İlk 5 Kayıt")
|
49 |
+
st.write(data.head())
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
scikit-learn
|
3 |
+
pandas
|
4 |
+
tensorflow
|
test.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
train.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|