add bart somethin g
Browse files
ebart.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spaces
|
2 |
+
import torch
|
3 |
+
from transformers import PegasusForConditionalGeneration
|
4 |
+
# 从 Fengshenbang-LM 下载 tokenizers_pegasus.py 和其他 Python 脚本
|
5 |
+
from tokenizers_pegasus import PegasusTokenizer
|
6 |
+
|
7 |
+
@spaces.GPU
|
8 |
+
def generate_summary(text, max_length=180, min_length=64):
|
9 |
+
# 加载标记器和模型
|
10 |
+
model = PegasusForConditionalGeneration.from_pretrained("IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese")
|
11 |
+
tokenizer = PegasusTokenizer.from_pretrained("IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese")
|
12 |
+
|
13 |
+
|
14 |
+
# 将模型移动到GPU
|
15 |
+
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
16 |
+
#model.to(device)
|
17 |
+
|
18 |
+
# 进行标记化
|
19 |
+
inputs = tokenizer(text, max_length=1024, return_tensors="pt")#.to(device)
|
20 |
+
|
21 |
+
# 生成摘要
|
22 |
+
summary_ids = model.generate(
|
23 |
+
inputs["input_ids"]
|
24 |
+
)
|
25 |
+
clean_summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
26 |
+
print(clean_summary)
|
27 |
+
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
text = (
|
31 |
+
"2023年3月16日我们从黑龙江大庆来到湖南长沙市长沙县福中路77号湖南省富达日化有限公司,"
|
32 |
+
"其宣传的特殊配方洗衣液比立白和蓝月亮的去污效果要好很多,还有油污净自称中国去污第一名,"
|
33 |
+
"做了一些去除废机油的实验,油污净清洗废机油,洗洗液去除废机油,洗完后直接排入城市管网的下水池,"
|
34 |
+
"每天都在进行相关测试,废机油属于危险废物,严重危害公共环境,请湖南环保局对其污染环境进行查处。"
|
35 |
+
"其公司宣传材料存在大量虚假宣传,夸大其词,感觉就是个传销组织,说其公司有妆字号资质,药字号资质,"
|
36 |
+
"全国工业产品餐具用洗涤剂资质,声称其设备是纯净水设备,是否有涉水批件,是否有消字号证件,"
|
37 |
+
"其消字号所有产品是否都进行备案和匹配的检测报告,请湖南市场监督管理局对其进行查处,"
|
38 |
+
"1997年到现在坑害全国百姓加盟其公司,请湖南商务局查处其是否具有特许经营资质,自称每年营业额1亿元多元,"
|
39 |
+
"从97年坑害到23年大量的客户没有开局相应的发票,存在重大偷税漏税嫌疑,请湖南税务机关对其进行查处!"
|
40 |
+
"还有其出口的设备,渠道是否正规,是白关,灰关,还是黑关,请湖南海关相关部门对其进行查处。"
|
41 |
+
)
|
42 |
+
generate_summary(text, max_length=128, min_length=64)
|