Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
·
c5079c2
1
Parent(s):
3e8d848
bugfix: 修复了读取的 excel 文件可能不完整的问题
Browse files- modules/train_func.py +18 -7
modules/train_func.py
CHANGED
@@ -6,20 +6,31 @@ import openai
|
|
6 |
import gradio as gr
|
7 |
import ujson as json
|
8 |
import commentjson
|
|
|
9 |
|
10 |
import modules.presets as presets
|
11 |
from modules.utils import get_file_hash, count_token
|
12 |
from modules.presets import i18n
|
13 |
|
14 |
def excel_to_jsonl(filepath, preview=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
jsonl = []
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
jsonl.append(row[1].to_dict())
|
21 |
-
if preview:
|
22 |
-
break
|
23 |
formatted_jsonl = []
|
24 |
for i in jsonl:
|
25 |
if "提问" in i and "答案" in i:
|
|
|
6 |
import gradio as gr
|
7 |
import ujson as json
|
8 |
import commentjson
|
9 |
+
import openpyxl
|
10 |
|
11 |
import modules.presets as presets
|
12 |
from modules.utils import get_file_hash, count_token
|
13 |
from modules.presets import i18n
|
14 |
|
15 |
def excel_to_jsonl(filepath, preview=False):
|
16 |
+
# 打开Excel文件
|
17 |
+
workbook = openpyxl.load_workbook(filepath)
|
18 |
+
|
19 |
+
# 获取第一个工作表
|
20 |
+
sheet = workbook.active
|
21 |
+
|
22 |
+
# 获取所有行数据
|
23 |
+
data = []
|
24 |
+
for row in sheet.iter_rows(values_only=True):
|
25 |
+
data.append(row)
|
26 |
+
|
27 |
+
# 构建字典列表
|
28 |
+
headers = data[0]
|
29 |
jsonl = []
|
30 |
+
for row in data[1:]:
|
31 |
+
row_data = dict(zip(headers, row))
|
32 |
+
if any(row_data.values()):
|
33 |
+
jsonl.append(row_data)
|
|
|
|
|
|
|
34 |
formatted_jsonl = []
|
35 |
for i in jsonl:
|
36 |
if "提问" in i and "答案" in i:
|