wangrongsheng commited on
Commit
e55d43b
·
1 Parent(s): 1328244

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +108 -117
  2. optimizeOpenAI.py +226 -0
app.py CHANGED
@@ -13,7 +13,8 @@ import gradio
13
  import markdown
14
  import json
15
  import tiktoken
16
-
 
17
  def parse_text(text):
18
  lines = text.split("\n")
19
  for i,line in enumerate(lines):
@@ -30,26 +31,47 @@ def parse_text(text):
30
  lines[i] = '<br/>'+line.replace(" ", "&nbsp;")
31
  return "".join(lines)
32
 
33
- def get_response(system, context, myKey, raw = False):
34
- openai.api_key = myKey
35
- response = openai.ChatCompletion.create(
36
- model="gpt-3.5-turbo",
37
- messages=[system, *context],
38
- )
39
- openai.api_key = ""
40
- if raw:
41
- return response
42
- else:
43
- message = response["choices"][0]["message"]["content"]
44
- message_with_stats = f'{message}'
45
- return message, parse_text(message_with_stats)
46
 
47
- def valid_apikey(api_key):
 
 
48
  try:
49
- get_response({"role": "system", "content": "You are a helpful assistant."}, [{"role": "user", "content": "test"}], api_key)
50
- return "可用的api-key"
 
 
 
51
  except:
52
- return "无效的api-key"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  class Paper:
55
  def __init__(self, path, title='', url='', abs='', authers=[], sl=[]):
@@ -303,8 +325,9 @@ class Reader:
303
  def __init__(self, key_word='', query='', filter_keys='',
304
  root_path='./',
305
  gitee_key='',
306
- sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', model_name="gpt-3.5-turbo", p=1.0, temperature=1.0):
307
- self.key = str(key) # OpenAI key
 
308
  self.user_name = user_name # 读者姓名
309
  self.key_word = key_word # 读者感兴趣的关键词
310
  self.query = query # 读者输入的搜索查询
@@ -435,7 +458,7 @@ class Reader:
435
 
436
  return image_url
437
 
438
- def summary_with_chat(self, paper_list, key, model_name, p, temperature):
439
  htmls = []
440
  utoken = 0
441
  ctoken = 0
@@ -451,7 +474,7 @@ class Reader:
451
  text += list(paper.section_text_dict.values())[0]
452
  #max_token = 2500 * 4
453
  #text = text[:max_token]
454
- chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
455
  htmls.append(chat_summary_text)
456
 
457
  # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
@@ -469,7 +492,7 @@ class Reader:
469
  # methods
470
  method_text += paper.section_text_dict[method_key]
471
  text = summary_text + "\n<Methods>:\n" + method_text
472
- chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
473
  htmls.append(chat_method_text)
474
  else:
475
  chat_method_text = ''
@@ -492,7 +515,7 @@ class Reader:
492
  text = summary_text + "\n <Conclusion>:\n" + conclusion_text
493
  else:
494
  text = summary_text
495
- chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
496
  htmls.append(chat_conclusion_text)
497
  htmls.append("\n")
498
  # token统计
@@ -507,24 +530,20 @@ class Reader:
507
  "cost": str(cost),
508
  }
509
  md_text = "\n".join(htmls)
510
-
511
  return markdown.markdown(md_text), pos_count
512
 
513
 
514
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
515
  stop=tenacity.stop_after_attempt(5),
516
  reraise=True)
517
- def chat_conclusion(self, text, key, model_name, p, temperature):
518
- openai.api_key = key
519
  conclusion_prompt_token = 650
520
  text_token = len(self.encoding.encode(text))
521
  clip_text_index = int(len(text)*(self.max_token_num-conclusion_prompt_token)/text_token)
522
- clip_text = text[:clip_text_index]
523
-
524
- messages=[
525
- {"role": "system", "content": "You are a reviewer in the field of ["+self.key_word+"] and you need to critically review this article"}, # chatgpt 角色
526
- {"role": "assistant", "content": "This is the <summary> and <conclusion> part of an English literature, where <summary> you have already summarized, but <conclusion> part, I need your help to summarize the following questions:"+clip_text}, # 背景知识,可以参考OpenReview的审稿流程
527
- {"role": "user", "content": """
528
  8. Make the following summary.Be sure to use Chinese answers (proper nouns need to be marked in English).
529
  - (1):What is the significance of this piece of work?
530
  - (2):Summarize the strengths and weaknesses of this article in three dimensions: innovation point, performance, and workload.
@@ -535,42 +554,26 @@ class Reader:
535
  - (2):Innovation point: xxx; Performance: xxx; Workload: xxx;\n
536
 
537
  Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
538
- """},
539
- ]
540
- response = openai.ChatCompletion.create(
541
- model=model_name,
542
- # prompt需要用英语替换,少占用token。
543
- messages=messages,
544
- temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
545
- top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
546
  )
547
-
548
- result = ''
549
- for choice in response.choices:
550
- result += choice.message.content
551
- #print("prompt_token_used:", response.usage.prompt_tokens,
552
- # "completion_token_used:", response.usage.completion_tokens,
553
- # "total_token_used:", response.usage.total_tokens)
554
- #print("response_time:", response.response_ms/1000.0, 's')
555
- usage_token = response.usage.prompt_tokens
556
- com_token = response.usage.completion_tokens
557
- total_token = response.usage.total_tokens
558
-
559
- return result, usage_token, com_token, total_token
560
 
561
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
562
  stop=tenacity.stop_after_attempt(5),
563
  reraise=True)
564
- def chat_method(self, text, key, model_name, p, temperature):
565
- openai.api_key = key
566
  method_prompt_token = 650
567
  text_token = len(self.encoding.encode(text))
568
  clip_text_index = int(len(text)*(self.max_token_num-method_prompt_token)/text_token)
569
- clip_text = text[:clip_text_index]
570
- messages=[
571
- {"role": "system", "content": "You are a researcher in the field of ["+self.key_word+"] who is good at summarizing papers using concise statements"}, # chatgpt 角色
572
- {"role": "assistant", "content": "This is the <summary> and <Method> part of an English document, where <summary> you have summarized, but the <Methods> part, I need your help to read and summarize the following questions."+clip_text}, # 背景知识
573
- {"role": "user", "content": """
574
  7. Describe in detail the methodological idea of this article. Be sure to use Chinese answers (proper nouns need to be marked in English). For example, its steps are.
575
  - (1):...
576
  - (2):...
@@ -584,42 +587,26 @@ class Reader:
584
  ....... \n\n
585
 
586
  Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
587
- """},
588
- ]
589
- response = openai.ChatCompletion.create(
590
- model=model_name,
591
- messages=messages,
592
- temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
593
- top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
594
  )
595
-
596
- result = ''
597
- for choice in response.choices:
598
- result += choice.message.content
599
- print("method_result:\n", result)
600
- #print("prompt_token_used:", response.usage.prompt_tokens,
601
- # "completion_token_used:", response.usage.completion_tokens,
602
- # "total_token_used:", response.usage.total_tokens)
603
- #print("response_time:", response.response_ms/1000.0, 's')
604
- usage_token = response.usage.prompt_tokens
605
- com_token = response.usage.completion_tokens
606
- total_token = response.usage.total_tokens
607
-
608
- return result, usage_token, com_token, total_token
609
 
610
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
611
  stop=tenacity.stop_after_attempt(5),
612
  reraise=True)
613
- def chat_summary(self, text, key, model_name, p, temperature):
614
- openai.api_key = key
615
  summary_prompt_token = 1000
616
  text_token = len(self.encoding.encode(text))
617
  clip_text_index = int(len(text)*(self.max_token_num-summary_prompt_token)/text_token)
618
  clip_text = text[:clip_text_index]
619
- messages=[
620
- {"role": "system", "content": "You are a researcher in the field of ["+self.key_word+"] who is good at summarizing papers using concise statements"},
621
- {"role": "assistant", "content": "This is the title, author, link, abstract and introduction of an English document. I need your help to read and summarize the following questions: "+clip_text},
622
- {"role": "user", "content": """
623
  1. Mark the title of the paper (with Chinese translation)
624
  2. list all the authors' names (use English)
625
  3. mark the first author's affiliation (output Chinese translation only)
@@ -643,29 +630,14 @@ class Reader:
643
  - (4):xxx.\n\n
644
 
645
  Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not have too much repetitive information, numerical values using the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed.
646
- """},
647
- ]
648
-
649
- response = openai.ChatCompletion.create(
650
- model=model_name,
651
- messages=messages,
652
- temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
653
- top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
654
  )
655
-
656
- result = ''
657
- for choice in response.choices:
658
- result += choice.message.content
659
- print("summary_result:\n", result)
660
- #print("prompt_token_used:", response.usage.prompt_tokens,
661
- # "completion_token_used:", response.usage.completion_tokens,
662
- # "total_token_used:", response.usage.total_tokens)
663
- #print("response_time:", response.response_ms/1000.0, 's')
664
- usage_token = response.usage.prompt_tokens
665
- com_token = response.usage.completion_tokens
666
- total_token = response.usage.total_tokens
667
-
668
- return result, usage_token, com_token, total_token
669
 
670
  def export_to_markdown(self, text, file_name, mode='w'):
671
  # 使用markdown模块的convert方法,将文本转换为html格式
@@ -681,10 +653,16 @@ class Reader:
681
  print(f"Query: {self.query}")
682
  print(f"Sort: {self.sort}")
683
 
684
- def upload_pdf(key, text, model_name, p, temperature, file):
685
  # 检查两个输入都不为空
686
- if not key or not text or not file:
 
 
 
 
 
687
  return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
 
688
  # 判断PDF文件
689
  #if file and file.name.split(".")[-1].lower() != "pdf":
690
  # return '请勿上传非 PDF 文件!'
@@ -692,22 +670,29 @@ def upload_pdf(key, text, model_name, p, temperature, file):
692
  section_list = text.split(',')
693
  paper_list = [Paper(path=file, sl=section_list)]
694
  # 创建一个Reader对象
695
- reader = Reader()
696
- sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, model_name=model_name, p=p, temperature=temperature)
 
697
  return cost, sum_info
698
 
699
  api_title = "api-key可用验证"
700
  api_description = '''<div align='left'>
 
701
  <img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
 
702
  <img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
 
703
  Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
 
704
  💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
 
705
  🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
 
706
  </div>
707
  '''
708
 
709
  api_input = [
710
- gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password')
711
  ]
712
  api_gui = gradio.Interface(fn=valid_apikey, inputs=api_input, outputs="text", title=api_title, description=api_description)
713
 
@@ -715,20 +700,26 @@ api_gui = gradio.Interface(fn=valid_apikey, inputs=api_input, outputs="text", ti
715
  title = "ChatPaper"
716
  # 描述
717
  description = '''<div align='left'>
 
718
  <img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
 
719
  <img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
 
720
  Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
 
721
  💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
 
722
  🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
 
723
  </div>
724
  '''
725
  # 创建Gradio界面
726
  ip = [
727
- gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
728
  gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
729
  gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], default="gpt-3.5-turbo", label="Select model"),
730
  gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
731
- gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
732
  gradio.inputs.File(label="请上传论文PDF(必填)")
733
  ]
734
 
 
13
  import markdown
14
  import json
15
  import tiktoken
16
+ import concurrent.futures
17
+ from optimizeOpenAI import chatPaper
18
  def parse_text(text):
19
  lines = text.split("\n")
20
  for i,line in enumerate(lines):
 
31
  lines[i] = '<br/>'+line.replace(" ", "&nbsp;")
32
  return "".join(lines)
33
 
34
+ # def get_response(system, context, myKey, raw = False):
35
+ # openai.api_key = myKey
36
+ # response = openai.ChatCompletion.create(
37
+ # model="gpt-3.5-turbo",
38
+ # messages=[system, *context],
39
+ # )
40
+ # openai.api_key = ""
41
+ # if raw:
42
+ # return response
43
+ # else:
44
+ # message = response["choices"][0]["message"]["content"]
45
+ # message_with_stats = f'{message}'
46
+ # return message, parse_text(message_with_stats)
47
 
48
+ valid_api_keys = []
49
+
50
+ def api_key_check(api_key):
51
  try:
52
+ chat = chatPaper([api_key])
53
+ if chat.check_api_available():
54
+ return api_key
55
+ else:
56
+ return None
57
  except:
58
+ return None
59
+
60
+ def valid_apikey(api_keys):
61
+ api_keys = api_keys.replace(' ', '')
62
+ api_key_list = api_keys.split(',')
63
+ print(api_key_list)
64
+ global valid_api_keys
65
+ with concurrent.futures.ThreadPoolExecutor() as executor:
66
+ future_results = {executor.submit(api_key_check, api_key): api_key for api_key in api_key_list}
67
+ for future in concurrent.futures.as_completed(future_results):
68
+ result = future.result()
69
+ if result:
70
+ valid_api_keys.append(result)
71
+ if len(valid_api_keys) > 0:
72
+ return "有效的api-key一共有{}个,分别是:{}, 现在可以提交你的paper".format(len(valid_api_keys), valid_api_keys)
73
+ return "无效的api-key"
74
+
75
 
76
  class Paper:
77
  def __init__(self, path, title='', url='', abs='', authers=[], sl=[]):
 
325
  def __init__(self, key_word='', query='', filter_keys='',
326
  root_path='./',
327
  gitee_key='',
328
+ sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', api_keys:list = [], model_name="gpt-3.5-turbo", p=1.0, temperature=1.0):
329
+ self.api_keys = api_keys
330
+ self.chatPaper = chatPaper( api_keys = self.api_keys, apiTimeInterval=10 , temperature=temperature,top_p=p,model_name=model_name) #openAI api封装
331
  self.user_name = user_name # 读者姓名
332
  self.key_word = key_word # 读者感兴趣的关键词
333
  self.query = query # 读者输入的搜索查询
 
458
 
459
  return image_url
460
 
461
+ def summary_with_chat(self, paper_list):
462
  htmls = []
463
  utoken = 0
464
  ctoken = 0
 
474
  text += list(paper.section_text_dict.values())[0]
475
  #max_token = 2500 * 4
476
  #text = text[:max_token]
477
+ chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text)
478
  htmls.append(chat_summary_text)
479
 
480
  # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
 
492
  # methods
493
  method_text += paper.section_text_dict[method_key]
494
  text = summary_text + "\n<Methods>:\n" + method_text
495
+ chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text)
496
  htmls.append(chat_method_text)
497
  else:
498
  chat_method_text = ''
 
515
  text = summary_text + "\n <Conclusion>:\n" + conclusion_text
516
  else:
517
  text = summary_text
518
+ chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text)
519
  htmls.append(chat_conclusion_text)
520
  htmls.append("\n")
521
  # token统计
 
530
  "cost": str(cost),
531
  }
532
  md_text = "\n".join(htmls)
 
533
  return markdown.markdown(md_text), pos_count
534
 
535
 
536
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
537
  stop=tenacity.stop_after_attempt(5),
538
  reraise=True)
539
+ def chat_conclusion(self, text):
 
540
  conclusion_prompt_token = 650
541
  text_token = len(self.encoding.encode(text))
542
  clip_text_index = int(len(text)*(self.max_token_num-conclusion_prompt_token)/text_token)
543
+ clip_text = text[:clip_text_index]
544
+ self.chatPaper.reset(convo_id="chatConclusion",system_prompt="You are a reviewer in the field of ["+self.key_word+"] and you need to critically review this article")
545
+ self.chatPaper.add_to_conversation(convo_id="chatConclusion", role="assistant", message="This is the <summary> and <conclusion> part of an English literature, where <summary> you have already summarized, but <conclusion> part, I need your help to summarize the following questions:"+clip_text)# 背景知识,可以参考OpenReview的审稿流程
546
+ content = """
 
 
547
  8. Make the following summary.Be sure to use Chinese answers (proper nouns need to be marked in English).
548
  - (1):What is the significance of this piece of work?
549
  - (2):Summarize the strengths and weaknesses of this article in three dimensions: innovation point, performance, and workload.
 
554
  - (2):Innovation point: xxx; Performance: xxx; Workload: xxx;\n
555
 
556
  Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
557
+ """
558
+ result = self.chatPaper.ask(
559
+ prompt = content,
560
+ role="user",
561
+ convo_id="chatConclusion",
 
 
 
562
  )
563
+ print(result)
564
+ return result[0], result[1], result[2], result[3]
 
 
 
 
 
 
 
 
 
 
 
565
 
566
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
567
  stop=tenacity.stop_after_attempt(5),
568
  reraise=True)
569
+ def chat_method(self, text):
 
570
  method_prompt_token = 650
571
  text_token = len(self.encoding.encode(text))
572
  clip_text_index = int(len(text)*(self.max_token_num-method_prompt_token)/text_token)
573
+ clip_text = text[:clip_text_index]
574
+ self.chatPaper.reset(convo_id="chatMethod",system_prompt="You are a researcher in the field of ["+self.key_word+"] who is good at summarizing papers using concise statements")# chatgpt 角色
575
+ self.chatPaper.add_to_conversation(convo_id="chatMethod", role="assistant", message=str("This is the <summary> and <Method> part of an English document, where <summary> you have summarized, but the <Methods> part, I need your help to read and summarize the following questions."+clip_text))
576
+ content= """
 
577
  7. Describe in detail the methodological idea of this article. Be sure to use Chinese answers (proper nouns need to be marked in English). For example, its steps are.
578
  - (1):...
579
  - (2):...
 
587
  ....... \n\n
588
 
589
  Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
590
+ """
591
+ result = self.chatPaper.ask(
592
+ prompt = content,
593
+ role="user",
594
+ convo_id="chatMethod",
 
 
595
  )
596
+ print(result)
597
+ return result[0], result[1], result[2], result[3]
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
600
  stop=tenacity.stop_after_attempt(5),
601
  reraise=True)
602
+ def chat_summary(self, text):
 
603
  summary_prompt_token = 1000
604
  text_token = len(self.encoding.encode(text))
605
  clip_text_index = int(len(text)*(self.max_token_num-summary_prompt_token)/text_token)
606
  clip_text = text[:clip_text_index]
607
+ self.chatPaper.reset(convo_id="chatSummary",system_prompt="You are a researcher in the field of ["+self.key_word+"] who is good at summarizing papers using concise statements")
608
+ self.chatPaper.add_to_conversation(convo_id="chatSummary", role="assistant", message=str("This is the title, author, link, abstract and introduction of an English document. I need your help to read and summarize the following questions: "+clip_text))
609
+ content= """
 
610
  1. Mark the title of the paper (with Chinese translation)
611
  2. list all the authors' names (use English)
612
  3. mark the first author's affiliation (output Chinese translation only)
 
630
  - (4):xxx.\n\n
631
 
632
  Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not have too much repetitive information, numerical values using the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed.
633
+ """
634
+ result = self.chatPaper.ask(
635
+ prompt = content,
636
+ role="user",
637
+ convo_id="chatSummary",
 
 
 
638
  )
639
+ print(result)
640
+ return result[0], result[1], result[2], result[3]
 
 
 
 
 
 
 
 
 
 
 
 
641
 
642
  def export_to_markdown(self, text, file_name, mode='w'):
643
  # 使用markdown模块的convert方法,将文本转换为html格式
 
653
  print(f"Query: {self.query}")
654
  print(f"Sort: {self.sort}")
655
 
656
+ def upload_pdf(api_keys, text, model_name, p, temperature, file):
657
  # 检查两个输入都不为空
658
+ api_key_list = None
659
+ if api_keys:
660
+ api_key_list = api_keys.split(',')
661
+ elif not api_keys and valid_api_keys!=[]:
662
+ api_key_list = valid_api_keys
663
+ if not text or not file or not api_key_list:
664
  return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
665
+
666
  # 判断PDF文件
667
  #if file and file.name.split(".")[-1].lower() != "pdf":
668
  # return '请勿上传非 PDF 文件!'
 
670
  section_list = text.split(',')
671
  paper_list = [Paper(path=file, sl=section_list)]
672
  # 创建一个Reader对象
673
+ print(api_key_list)
674
+ reader = Reader(api_keys=api_key_list, model_name=model_name, p=p, temperature=temperature)
675
+ sum_info, cost = reader.summary_with_chat(paper_list=paper_list) # type: ignore
676
  return cost, sum_info
677
 
678
  api_title = "api-key可用验证"
679
  api_description = '''<div align='left'>
680
+
681
  <img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
682
+
683
  <img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
684
+
685
  Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
686
+
687
  💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
688
+
689
  🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
690
+
691
  </div>
692
  '''
693
 
694
  api_input = [
695
+ gradio.inputs.Textbox(label="请输入你的API-key(必填, 多个API-key请用英文逗号隔开)", default="", type='password')
696
  ]
697
  api_gui = gradio.Interface(fn=valid_apikey, inputs=api_input, outputs="text", title=api_title, description=api_description)
698
 
 
700
  title = "ChatPaper"
701
  # 描述
702
  description = '''<div align='left'>
703
+
704
  <img src='https://visitor-badge.laobi.icu/badge?page_id=https://huggingface.co/spaces/wangrongsheng/ChatPaper'>
705
+
706
  <img align='right' src='https://i.328888.xyz/2023/03/12/vH9dU.png' width="150">
707
+
708
  Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github.com/kaixindelele/ChatPaper) .
709
+
710
  💗如果您觉得我们的项目对您有帮助,还请您给我们一些鼓励!💗
711
+
712
  🔴请注意:千万不要用于严肃的学术场景,只能用于论文阅读前的初筛!
713
+
714
  </div>
715
  '''
716
  # 创建Gradio界面
717
  ip = [
718
+ gradio.inputs.Textbox(label="请输入你的API-key(必填, 多个API-key请用英文逗号隔开),不需要空格", default="", type='password'),
719
  gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
720
  gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], default="gpt-3.5-turbo", label="Select model"),
721
  gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
722
+ gradio.inputs.Slider(minimum=-0, maximum=5.0, default=0.5, step=0.5, label="Temperature"),
723
  gradio.inputs.File(label="请上传论文PDF(必填)")
724
  ]
725
 
optimizeOpenAI.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ A simple wrapper for the official ChatGPT API
3
+ """
4
+ import json
5
+ import os
6
+ import threading
7
+ import time
8
+ import requests
9
+ import tiktoken
10
+ from typing import Generator
11
+ from queue import PriorityQueue as PQ
12
+ import json
13
+ import os
14
+ import time
15
+ ENCODER = tiktoken.get_encoding("gpt2")
16
+ class chatPaper:
17
+ """
18
+ Official ChatGPT API
19
+ """
20
+ def __init__(
21
+ self,
22
+ api_keys: list,
23
+ proxy = None,
24
+ api_proxy = None,
25
+ max_tokens: int = 4000,
26
+ temperature: float = 0.5,
27
+ top_p: float = 1.0,
28
+ model_name: str = "gpt-3.5-turbo",
29
+ reply_count: int = 1,
30
+ system_prompt = "You are ChatPaper, A paper reading bot",
31
+ lastAPICallTime = time.time()-100,
32
+ apiTimeInterval = 20,
33
+ ) -> None:
34
+ self.model_name = model_name
35
+ self.system_prompt = system_prompt
36
+ self.apiTimeInterval = apiTimeInterval
37
+ self.session = requests.Session()
38
+ self.api_keys = PQ()
39
+ for key in api_keys:
40
+ self.api_keys.put((lastAPICallTime,key))
41
+ self.proxy = proxy
42
+ if self.proxy:
43
+ proxies = {
44
+ "http": self.proxy,
45
+ "https": self.proxy,
46
+ }
47
+ self.session.proxies = proxies
48
+ self.max_tokens = max_tokens
49
+ self.temperature = temperature
50
+ self.top_p = top_p
51
+ self.reply_count = reply_count
52
+ self.decrease_step = 250
53
+ self.conversation = {}
54
+ if self.token_str(self.system_prompt) > self.max_tokens:
55
+ raise Exception("System prompt is too long")
56
+ self.lock = threading.Lock()
57
+
58
+ def get_api_key(self):
59
+ with self.lock:
60
+ apiKey = self.api_keys.get()
61
+ delay = self._calculate_delay(apiKey)
62
+ time.sleep(delay)
63
+ self.api_keys.put((time.time(), apiKey[1]))
64
+ return apiKey[1]
65
+
66
+ def _calculate_delay(self, apiKey):
67
+ elapsed_time = time.time() - apiKey[0]
68
+ if elapsed_time < self.apiTimeInterval:
69
+ return self.apiTimeInterval - elapsed_time
70
+ else:
71
+ return 0
72
+
73
+ def add_to_conversation(self, message: str, role: str, convo_id: str = "default"):
74
+ if(convo_id not in self.conversation):
75
+ self.reset(convo_id)
76
+ self.conversation[convo_id].append({"role": role, "content": message})
77
+
78
+ def __truncate_conversation(self, convo_id: str = "default"):
79
+ """
80
+ Truncate the conversation
81
+ """
82
+ last_dialog = self.conversation[convo_id][-1]
83
+ query = str(last_dialog['content'])
84
+ if(len(ENCODER.encode(str(query)))>self.max_tokens):
85
+ query = query[:int(1.5*self.max_tokens)]
86
+ while(len(ENCODER.encode(str(query)))>self.max_tokens):
87
+ query = query[:self.decrease_step]
88
+ self.conversation[convo_id] = self.conversation[convo_id][:-1]
89
+ full_conversation = "\n".join([str(x["content"]) for x in self.conversation[convo_id]],)
90
+ if len(ENCODER.encode(full_conversation)) > self.max_tokens:
91
+ self.conversation_summary(convo_id=convo_id)
92
+ while True:
93
+ full_conversation = ""
94
+ for x in self.conversation[convo_id]:
95
+ full_conversation = str(x["content"]) + "\n" + full_conversation
96
+ if (len(ENCODER.encode(full_conversation+query)) > self.max_tokens):
97
+ query = query[:self.decrease_step]
98
+ else:
99
+ break
100
+ last_dialog['content'] = str(query)
101
+ self.conversation[convo_id].append(last_dialog)
102
+
103
+ def ask_stream(
104
+ self,
105
+ prompt: str,
106
+ role: str = "user",
107
+ convo_id: str = "default",
108
+ **kwargs,
109
+ ) -> Generator:
110
+ if convo_id not in self.conversation:
111
+ self.reset(convo_id=convo_id)
112
+ self.add_to_conversation(prompt, "user", convo_id=convo_id)
113
+ self.__truncate_conversation(convo_id=convo_id)
114
+ apiKey = self.get_api_key()
115
+ response = self.session.post(
116
+ "https://api.openai.com/v1/chat/completions",
117
+ headers={"Authorization": f"Bearer {kwargs.get('api_key', apiKey)}"},
118
+ json={
119
+ "model": self.model_name,
120
+ "messages": self.conversation[convo_id],
121
+ "stream": True,
122
+ # kwargs
123
+ "temperature": kwargs.get("temperature", self.temperature),
124
+ "top_p": kwargs.get("top_p", self.top_p),
125
+ "n": kwargs.get("n", self.reply_count),
126
+ "user": role,
127
+ },
128
+ stream=True,
129
+ )
130
+ if response.status_code != 200:
131
+ raise Exception(
132
+ f"Error: {response.status_code} {response.reason} {response.text}",
133
+ )
134
+ for line in response.iter_lines():
135
+ if not line:
136
+ continue
137
+ # Remove "data: "
138
+ line = line.decode("utf-8")[6:]
139
+ if line == "[DONE]":
140
+ break
141
+ resp: dict = json.loads(line)
142
+ choices = resp.get("choices")
143
+ if not choices:
144
+ continue
145
+ delta = choices[0].get("delta")
146
+ if not delta:
147
+ continue
148
+ if "content" in delta:
149
+ content = delta["content"]
150
+ yield content
151
+ def ask(self, prompt: str, role: str = "user", convo_id: str = "default", **kwargs):
152
+ """
153
+ Non-streaming ask
154
+ """
155
+ response = self.ask_stream(
156
+ prompt=prompt,
157
+ role=role,
158
+ convo_id=convo_id,
159
+ **kwargs,
160
+ )
161
+ full_response: str = "".join(response)
162
+ self.add_to_conversation(full_response, role, convo_id=convo_id)
163
+ usage_token = self.token_str(prompt)
164
+ com_token = self.token_str(full_response)
165
+ total_token = self.token_cost(convo_id=convo_id)
166
+ return full_response, usage_token, com_token, total_token
167
+
168
+ def check_api_available(self):
169
+ response = self.session.post(
170
+ "https://api.openai.com/v1/chat/completions",
171
+ headers={"Authorization": f"Bearer {self.get_api_key()}"},
172
+ json={
173
+ "model": self.engine,
174
+ "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "print A"}],
175
+ "stream": True,
176
+ # kwargs
177
+ "temperature": self.temperature,
178
+ "top_p": self.top_p,
179
+ "n": self.reply_count,
180
+ "user": "user",
181
+ },
182
+ stream=True,
183
+ )
184
+ if response.status_code == 200:
185
+ return True
186
+ else:
187
+ return False
188
+ def reset(self, convo_id: str = "default", system_prompt = None):
189
+ """
190
+ Reset the conversation
191
+ """
192
+ self.conversation[convo_id] = [
193
+ {"role": "system", "content": str(system_prompt or self.system_prompt)},
194
+ ]
195
+ def conversation_summary(self, convo_id: str = "default"):
196
+ input = ""
197
+ role = ""
198
+ for conv in self.conversation[convo_id]:
199
+ if (conv["role"]=='user'):
200
+ role = 'User'
201
+ else:
202
+ role = 'ChatGpt'
203
+ input+=role+' : '+conv['content']+'\n'
204
+ prompt = "Your goal is to summarize the provided conversation in English. Your summary should be concise and focus on the key information to facilitate better dialogue for the large language model.Ensure that you include all necessary details and relevant information while still reducing the length of the conversation as much as possible. Your summary should be clear and easily understandable for the ChatGpt model providing a comprehensive and concise summary of the conversation."
205
+ if(self.token_str(str(input)+prompt)>self.max_tokens):
206
+ input = input[self.token_str(str(input))-self.max_tokens:]
207
+ while self.token_str(str(input)+prompt)>self.max_tokens:
208
+ input = input[self.decrease_step:]
209
+ prompt = prompt.replace("{conversation}", input)
210
+ self.reset(convo_id='conversationSummary')
211
+ response = self.ask(prompt,convo_id='conversationSummary')
212
+ while self.token_str(str(response))>self.max_tokens:
213
+ response = response[:-self.decrease_step]
214
+ self.reset(convo_id='conversationSummary',system_prompt='Summariaze our diaglog')
215
+ self.conversation[convo_id] = [
216
+ {"role": "system", "content": self.system_prompt},
217
+ {"role": "user", "content": "Summariaze our diaglog"},
218
+ {"role": 'assistant', "content": response},
219
+ ]
220
+ return self.conversation[convo_id]
221
+ def token_cost(self,convo_id: str = "default"):
222
+ return len(ENCODER.encode("\n".join([x["content"] for x in self.conversation[convo_id]])))
223
+ def token_str(self,content:str):
224
+ return len(ENCODER.encode(content))
225
+ def main():
226
+ return