Tuchuanhuhuhu commited on
Commit
c5079c2
·
1 Parent(s): 3e8d848

bugfix: 修复了读取的 excel 文件可能不完整的问题

Browse files
Files changed (1) hide show
  1. modules/train_func.py +18 -7
modules/train_func.py CHANGED
@@ -6,20 +6,31 @@ import openai
6
  import gradio as gr
7
  import ujson as json
8
  import commentjson
 
9
 
10
  import modules.presets as presets
11
  from modules.utils import get_file_hash, count_token
12
  from modules.presets import i18n
13
 
14
  def excel_to_jsonl(filepath, preview=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  jsonl = []
16
- with open(filepath, "rb") as f:
17
- import pandas as pd
18
- df = pd.read_excel(f)
19
- for row in df.iterrows():
20
- jsonl.append(row[1].to_dict())
21
- if preview:
22
- break
23
  formatted_jsonl = []
24
  for i in jsonl:
25
  if "提问" in i and "答案" in i:
 
6
  import gradio as gr
7
  import ujson as json
8
  import commentjson
9
+ import openpyxl
10
 
11
  import modules.presets as presets
12
  from modules.utils import get_file_hash, count_token
13
  from modules.presets import i18n
14
 
15
  def excel_to_jsonl(filepath, preview=False):
16
+ # 打开Excel文件
17
+ workbook = openpyxl.load_workbook(filepath)
18
+
19
+ # 获取第一个工作表
20
+ sheet = workbook.active
21
+
22
+ # 获取所有行数据
23
+ data = []
24
+ for row in sheet.iter_rows(values_only=True):
25
+ data.append(row)
26
+
27
+ # 构建字典列表
28
+ headers = data[0]
29
  jsonl = []
30
+ for row in data[1:]:
31
+ row_data = dict(zip(headers, row))
32
+ if any(row_data.values()):
33
+ jsonl.append(row_data)
 
 
 
34
  formatted_jsonl = []
35
  for i in jsonl:
36
  if "提问" in i and "答案" in i: