Roberta2024 commited on
Commit
53a8979
·
verified ·
1 Parent(s): 95ff3c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -29
app.py CHANGED
@@ -2,9 +2,9 @@ import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
- import base64 # This is a built-in module, no need to install
 
6
 
7
- # Function to set background image
8
  def set_background(image_file):
9
  with open(image_file, "rb") as image_file:
10
  encoded_string = base64.b64encode(image_file.read()).decode()
@@ -14,77 +14,99 @@ def set_background(image_file):
14
  .stApp {{
15
  background-image: url(data:image/png;base64,{encoded_string});
16
  background-size: cover;
 
 
17
  }}
18
  </style>
19
  """,
20
  unsafe_allow_html=True
21
  )
22
 
23
- # Set the background image
24
  set_background('ddog.png')
25
 
26
- # Rest of the code remains the same
27
- # ...
28
-
29
- # Streamlit app title
30
  st.title('寵物診所資訊爬蟲')
31
 
32
  # 網址列表
33
  urls = [
34
  'https://www.tw-animal.com/pet/171211/c000196.html',
35
  'https://www.tw-animal.com/pet/171211/c000186.html',
36
- # ... (rest of the URLs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  ]
38
 
39
  # 讓使用者輸入評分門檻
40
  min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)
41
 
 
42
  if st.button('開始爬蟲'):
43
- # 建立空的列表來儲存每一頁的資料
44
  all_data = []
45
-
46
- # 顯示進度條
47
  progress_bar = st.progress(0)
48
  status_text = st.empty()
49
 
50
- # 遍歷每個網址
51
  for i, url in enumerate(urls):
52
- # 更新進度條和狀態文字
53
  progress = int((i + 1) / len(urls) * 100)
54
  progress_bar.progress(progress)
55
  status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')
56
 
57
- # 發送HTTP請求獲取頁面內容
58
  response = requests.get(url)
59
  response.encoding = 'utf-8'
60
-
61
- # 使用BeautifulSoup解析頁面
62
  soup = BeautifulSoup(response.text, 'html.parser')
63
 
64
- # 抓取標題、手機、地址和評分
65
  title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
66
  phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
67
  address = soup.find('a', class_='t-font-medium').get_text(strip=True)
68
  rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
 
 
 
69
 
70
- # 將評分大於或等於使用者輸入的資料存入列表
71
  if rating >= min_rating:
72
  all_data.append({
73
  '標題': title,
74
  '手機': phone,
75
  '地址': address,
76
- '評分': rating
 
77
  })
78
 
79
- # 將所有符合條件的資料轉換為DataFrame
80
  df = pd.DataFrame(all_data)
81
 
82
- # 檢查是否有符合條件的資料
83
  if not df.empty:
84
- # 輸出篩選後的DataFrame
85
  st.dataframe(df)
86
-
87
- # 提供下載 CSV 檔案的選項
 
 
 
 
 
 
 
88
  csv = df.to_csv(index=False)
89
  st.download_button(
90
  label="下載 CSV 檔案",
@@ -95,19 +117,17 @@ if st.button('開始爬蟲'):
95
  else:
96
  st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")
97
 
98
- # 清除進度條和狀態文字
99
  progress_bar.empty()
100
  status_text.empty()
101
 
102
  # LINE Notify 部分
103
  st.header('傳送至 LINE Notify')
104
  token = st.text_input("請輸入 LINE Notify 權杖")
105
-
106
  if st.button('傳送至 LINE'):
107
  if 'df' in locals() and not df.empty:
108
  msg = df.to_string(index=False)
109
 
110
- # 傳送 LINE Notify 訊息
111
  def send_line_notify(token, msg):
112
  headers = {
113
  "Authorization": "Bearer " + token,
@@ -119,11 +139,10 @@ if st.button('傳送至 LINE'):
119
  r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
120
  return r.status_code
121
 
122
- # 呼叫傳送 LINE Notify 函數
123
  status_code = send_line_notify(token, msg)
124
  if status_code == 200:
125
  st.success('成功傳送至 LINE Notify!')
126
  else:
127
  st.error('傳送失敗,請檢查您的權杖是否正確。')
128
  else:
129
- st.warning('沒有資料可以傳送,請先執行爬蟲。')
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
+ import base64
6
+ import plotly.express as px
7
 
 
8
  def set_background(image_file):
9
  with open(image_file, "rb") as image_file:
10
  encoded_string = base64.b64encode(image_file.read()).decode()
 
14
  .stApp {{
15
  background-image: url(data:image/png;base64,{encoded_string});
16
  background-size: cover;
17
+ background-position: center top;
18
+ padding-top: 100px;
19
  }}
20
  </style>
21
  """,
22
  unsafe_allow_html=True
23
  )
24
 
25
+ # 設定背景圖片
26
  set_background('ddog.png')
27
 
28
+ # Streamlit app 標題
 
 
 
29
  st.title('寵物診所資訊爬蟲')
30
 
31
  # 網址列表
32
  urls = [
33
  'https://www.tw-animal.com/pet/171211/c000196.html',
34
  'https://www.tw-animal.com/pet/171211/c000186.html',
35
+ 'https://www.tw-animal.com/pet/171211/c000081.html',
36
+ 'https://www.tw-animal.com/pet/171211/c000848.html',
37
+ 'https://www.tw-animal.com/pet/171211/c000045.html',
38
+ 'https://www.tw-animal.com/pet/171211/c001166.html',
39
+ 'https://www.tw-animal.com/pet/171211/c000773.html',
40
+ 'https://www.tw-animal.com/pet/171211/c001038.html',
41
+ 'https://www.tw-animal.com/pet/171211/c000741.html',
42
+ 'https://www.tw-animal.com/pet/171211/c001451.html',
43
+ 'https://www.tw-animal.com/pet/171211/c000102.html',
44
+ 'https://www.tw-animal.com/pet/171211/c000757.html',
45
+ 'https://www.tw-animal.com/pet/171211/c000703.html',
46
+ 'https://www.tw-animal.com/pet/171211/c000481.html',
47
+ 'https://www.tw-animal.com/pet/171211/c000971.html',
48
+ 'https://www.tw-animal.com/pet/171211/c000187.html',
49
+ 'https://www.tw-animal.com/pet/171211/c001357.html',
50
+ 'https://www.tw-animal.com/pet/171211/c001065.html',
51
+ 'https://www.tw-animal.com/pet/171211/c000165.html',
52
+ 'https://www.tw-animal.com/pet/171211/c000217.html',
53
+ 'https://www.tw-animal.com/pet/171211/c000802.html',
54
+ 'https://www.tw-animal.com/pet/171211/c001034.html',
55
+ 'https://www.tw-animal.com/pet/171211/c001453.html'
56
  ]
57
 
58
  # 讓使用者輸入評分門檻
59
  min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)
60
 
61
+ # 當使用者按下「開始爬蟲」按鈕時開始抓取資料
62
  if st.button('開始爬蟲'):
 
63
  all_data = []
 
 
64
  progress_bar = st.progress(0)
65
  status_text = st.empty()
66
 
67
+ # 遍歷每個網址並抓取資料
68
  for i, url in enumerate(urls):
 
69
  progress = int((i + 1) / len(urls) * 100)
70
  progress_bar.progress(progress)
71
  status_text.text(f'正在處理第 {i+1} 個網址,共 {len(urls)} 個')
72
 
 
73
  response = requests.get(url)
74
  response.encoding = 'utf-8'
 
 
75
  soup = BeautifulSoup(response.text, 'html.parser')
76
 
 
77
  title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
78
  phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
79
  address = soup.find('a', class_='t-font-medium').get_text(strip=True)
80
  rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
81
+
82
+ # 提取地址中的區名
83
+ district = address.split('區')[0] + '區' if '區' in address else '其他'
84
 
85
+ # 只將符合評分條件的資料加入列表
86
  if rating >= min_rating:
87
  all_data.append({
88
  '標題': title,
89
  '手機': phone,
90
  '地址': address,
91
+ '評分': rating,
92
+ '區': district
93
  })
94
 
95
+ # 轉換為 DataFrame
96
  df = pd.DataFrame(all_data)
97
 
98
+ # 如果有符合條件的資料,顯示並繪圖
99
  if not df.empty:
 
100
  st.dataframe(df)
101
+
102
+ # 以每個區的診所數量繪製動態圖表
103
+ district_counts = df['區'].value_counts().reset_index()
104
+ district_counts.columns = ['區', '診所數量']
105
+
106
+ fig = px.bar(district_counts, x='區', y='診所數量', title='各區寵物診所數量')
107
+ st.plotly_chart(fig)
108
+
109
+ # 提供 CSV 下載功能
110
  csv = df.to_csv(index=False)
111
  st.download_button(
112
  label="下載 CSV 檔案",
 
117
  else:
118
  st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")
119
 
120
+ # 清空進度條和狀態訊息
121
  progress_bar.empty()
122
  status_text.empty()
123
 
124
  # LINE Notify 部分
125
  st.header('傳送至 LINE Notify')
126
  token = st.text_input("請輸入 LINE Notify 權杖")
 
127
  if st.button('傳送至 LINE'):
128
  if 'df' in locals() and not df.empty:
129
  msg = df.to_string(index=False)
130
 
 
131
  def send_line_notify(token, msg):
132
  headers = {
133
  "Authorization": "Bearer " + token,
 
139
  r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
140
  return r.status_code
141
 
 
142
  status_code = send_line_notify(token, msg)
143
  if status_code == 200:
144
  st.success('成功傳送至 LINE Notify!')
145
  else:
146
  st.error('傳送失敗,請檢查您的權杖是否正確。')
147
  else:
148
+ st.warning('沒有資料可以傳送,請先執行爬蟲。')