MansoorSarookh commited on
Commit
b5d49ea
·
verified ·
1 Parent(s): 5cac1e5

Create preprocessor.py

Browse files
Files changed (1) hide show
  1. preprocessor.py +51 -0
preprocessor.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ def prepos(data):
4
+ pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{1,2}\s(?:AM|PM)\s-\s'
5
+ messages = re.split(pattern,data)[1:]
6
+ dates = re.findall(pattern,data)
7
+
8
+ df =pd.DataFrame({'user_message':messages,'dates':dates})
9
+
10
+ df['dates']=pd.to_datetime(df['dates'],format='%m/%d/%y, %I:%M %p - ')
11
+
12
+
13
+ users=[]
14
+ message=[]
15
+ for i in df['user_message']:
16
+ x=re.split('([\w\W]+?):\s',i)
17
+ if x[1:]:
18
+ users.append(x[1])
19
+ message.append(x[2])
20
+ else:
21
+ users.append('group_notification')
22
+ message.append(x[0])
23
+
24
+
25
+
26
+
27
+ df['users']=users
28
+ df['message']=message
29
+ df.drop(columns=['user_message'],inplace=True)
30
+
31
+ df['only_date']=df['dates'].dt.date
32
+ df['year'] = df['dates'].dt.year
33
+ df['month_num'] = df['dates'].dt.month
34
+ df['month'] = df['dates'].dt.month_name()
35
+ df['day ']= df['dates'].dt.day
36
+ df['day_name']= df['dates'].dt.day_name()
37
+ df['hour'] = df['dates'].dt.hour
38
+ df['minute'] = df['dates'].dt.minute
39
+
40
+
41
+ period=[]
42
+ for hour in df[['day_name','hour']]['hour']:
43
+ if hour == 23 :
44
+ period.append(str(hour)+ '-' + str('00'))
45
+ elif hour == 0 :
46
+ period.append(str('00')+ '-' + str(hour+1))
47
+ else:
48
+ period.append(str(hour)+ '-' + str(hour+1))
49
+
50
+ df['period']=period
51
+ return df