CSB261 commited on
Commit
a5ab553
โ€ข
1 Parent(s): 2f5b5b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -38
app.py CHANGED
@@ -3,50 +3,57 @@ import pandas as pd
3
  import re
4
  from collections import Counter
5
  from openpyxl import Workbook
 
6
 
7
  def process_excel(file):
8
  # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
9
- df = pd.read_excel(file, sheet_name=0)
10
 
11
- # D4๋ถ€ํ„ฐ D์—ด์˜ ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
12
- product_names = df.iloc[3:, 3].dropna() # D4๋ถ€ํ„ฐ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
13
-
14
- # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ์ •์ œ
15
- all_keywords = []
16
- for name in product_names:
17
- # ํŠน์ˆ˜ ๋ฌธ์ž ์ œ๊ฑฐ ๋ฐ ํ‚ค์›Œ๋“œ ์ถ”์ถœ
18
- keywords = re.findall(r'\b\w+\b', name)
19
- # ์ค‘๋ณต ์ œ๊ฑฐ
20
- unique_keywords = set(keywords)
21
- all_keywords.extend(unique_keywords)
22
-
23
- # ๋นˆ๋„ ๊ณ„์‚ฐ
24
- keyword_counts = Counter(all_keywords)
25
- sorted_keywords = keyword_counts.most_common()
26
-
27
- # ๊ฒฐ๊ณผ๋ฅผ ์—‘์…€๋กœ ์ €์žฅ
28
- wb = Workbook()
29
- ws = wb.active
30
- ws.title = "Keywords"
31
- ws['A4'] = "ํ‚ค์›Œ๋“œ"
32
- ws['B4'] = "๋นˆ๋„"
33
-
34
- for idx, (keyword, count) in enumerate(sorted_keywords, start=5):
35
- ws[f'A{idx}'] = keyword
36
- ws[f'B{idx}'] = count
37
-
38
- result_file = "keyword_counts.xlsx"
39
- wb.save(result_file)
40
-
41
- return result_file
 
 
 
 
 
 
 
42
 
43
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
44
- iface = gr.Interface(
45
  fn=process_excel,
46
- inputs=gr.File(label="์—‘์…€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”"),
47
- outputs=gr.File(label="๊ฒฐ๊ณผ ์—‘์…€ ํŒŒ์ผ"),
48
- title="์—‘์…€ ํ‚ค์›Œ๋“œ ๋ถ„์„๊ธฐ"
49
  )
50
 
51
  if __name__ == "__main__":
52
- iface.launch()
 
3
  import re
4
  from collections import Counter
5
  from openpyxl import Workbook
6
+ from openpyxl.chart import BarChart, Reference
7
 
8
  def process_excel(file):
9
  # ์—‘์…€ ํŒŒ์ผ ์ฝ๊ธฐ
10
+ df = pd.read_excel(file)
11
 
12
+ # D์—ด(D4๋ถ€ํ„ฐ)์˜ ์ƒํ’ˆ๋ช… ๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
13
+ data = df.iloc[3:, 3].dropna().astype(str) # D4๋ถ€ํ„ฐ D์—ด ์ „์ฒด ๊ฐ€์ ธ์˜ค๊ธฐ (4๋ฒˆ์งธ ํ–‰๋ถ€ํ„ฐ ์‹œ์ž‘)
14
+
15
+ # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ๋ฐ ์ฒ˜๋ฆฌ
16
+ keyword_list = []
17
+ for item in data:
18
+ keywords = re.findall(r'\b\w+\b', item) # ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐํ•˜๊ณ  ํ‚ค์›Œ๋“œ ์ถ”์ถœ
19
+ keywords = list(set(keywords)) # ์ค‘๋ณต ์ œ๊ฑฐ
20
+ keyword_list.extend(keywords)
21
+
22
+ # ํ‚ค์›Œ๋“œ ๋นˆ๋„์ˆ˜ ๊ณ„์‚ฐ
23
+ keyword_count = Counter(keyword_list)
24
+
25
+ # ๊ฒฐ๊ณผ๋ฅผ ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„์œผ๋กœ ๋ณ€ํ™˜
26
+ result_df = pd.DataFrame(keyword_count.items(), columns=['ํ‚ค์›Œ๋“œ', '๋นˆ๋„']).sort_values(by='๋นˆ๋„', ascending=False).reset_index(drop=True)
27
+
28
+ # A4์™€ B4 ์…€๋ถ€ํ„ฐ ๋ฐ์ดํ„ฐ๊ฐ€ ๋“ค์–ด๊ฐ€๋„๋ก ์ˆ˜์ •
29
+ with pd.ExcelWriter('keyword_result.xlsx', engine='openpyxl') as writer:
30
+ result_df.to_excel(writer, index=False, startrow=3, startcol=0) # A4 ์…€์— ํ•ด๋‹นํ•˜๋Š” 3๋ฒˆ์งธ ํ–‰, 0๋ฒˆ์งธ ์—ด๋ถ€ํ„ฐ ์‹œ์ž‘
31
+
32
+ # ์›Œํฌ๋ถ ๋ฐ ์‹œํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
33
+ workbook =
34
+ sheet = writer.sheets['Sheet1']
35
+
36
+ # ์ฐจํŠธ ์ƒ์„ฑ
37
+ chart = BarChart()
38
+ data = Reference(sheet, min_col=2, min_row=4, max_row=3 + len(result_df), max_col=2)
39
+ categories = Reference(sheet, min_col=1, min_row=4, max_row=3 + len(result_df))
40
+ chart.add_data(data, titles_from_data=True)
41
+ chart.set_categories(categories)
42
+ chart.title = "ํ‚ค์›Œ๋“œ ๋นˆ๋„์ˆ˜"
43
+ chart.x_axis.title = "ํ‚ค์›Œ๋“œ"
44
+ chart.y_axis.title = "๋นˆ๋„"
45
+
46
+ # ์ฐจํŠธ๋ฅผ ์‹œํŠธ์— ์ถ”๊ฐ€
47
+ sheet.add_chart(chart, "E4") # E4 ์…€์— ์ฐจํŠธ๋ฅผ ์ถ”๊ฐ€
48
+
49
+ return 'keyword_result.xlsx'
50
 
51
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
52
+ interface = gr.Interface(
53
  fn=process_excel,
54
+ inputs=gr.File(label="์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ"),
55
+ outputs=gr.File(label="๋ถ„์„ ๊ฒฐ๊ณผ ํŒŒ์ผ")
 
56
  )
57
 
58
  if __name__ == "__main__":
59
+ interface.launch()