Ilyas KHIAT commited on
Commit
8a6cca6
·
1 Parent(s): 5596129

vue enhance

Browse files
Files changed (1) hide show
  1. utils/audit/audit_doc.py +11 -11
utils/audit/audit_doc.py CHANGED
@@ -4,14 +4,14 @@ import tiktoken
4
  import textstat
5
  from docx import Document
6
  import io
7
- from rake_nltk import Rake
8
- import nltk
9
- from nltk.corpus import stopwords
10
  from openai import OpenAI
11
 
12
  # Download NLTK stopwords
13
- nltk.download('stopwords')
14
- nltk.download('punkt')
15
 
16
  #function to use gpt4o-mini
17
  def extract_relevant_keywords(prompt: str) -> str:
@@ -65,10 +65,10 @@ def evaluate_text_quality(text: str) -> dict:
65
  # Scale the global score to 0-5
66
  global_score_0_5 = global_score * 5
67
 
68
- def extract_keywords(text):
69
- rake = Rake(stopwords.words('french'))
70
- rake.extract_keywords_from_text(text)
71
- return rake.get_ranked_phrases()
72
 
73
 
74
 
@@ -168,8 +168,8 @@ def audit_descriptif_pdf(file,max_img_width) -> dict:
168
 
169
  # Extract key words from the document
170
  text = " ".join([page["texte"] for page in doc_content.values()])
171
- key_words = extract_keywords(text)
172
- list_key_words_text = "\n".join(key_words[:10])
173
  prompt = f'''Voici le document:
174
  - {text}
175
  Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
 
4
  import textstat
5
  from docx import Document
6
  import io
7
+ # from rake_nltk import Rake
8
+ # import nltk
9
+ # from nltk.corpus import stopwords
10
  from openai import OpenAI
11
 
12
  # Download NLTK stopwords
13
+ # nltk.download('stopwords')
14
+ # nltk.download('punkt')
15
 
16
  #function to use gpt4o-mini
17
  def extract_relevant_keywords(prompt: str) -> str:
 
65
  # Scale the global score to 0-5
66
  global_score_0_5 = global_score * 5
67
 
68
+ # def extract_keywords(text):
69
+ # rake = Rake(stopwords.words('french'))
70
+ # rake.extract_keywords_from_text(text)
71
+ # return rake.get_ranked_phrases()
72
 
73
 
74
 
 
168
 
169
  # Extract key words from the document
170
  text = " ".join([page["texte"] for page in doc_content.values()])
171
+ # key_words = extract_keywords(text)
172
+ # list_key_words_text = "\n".join(key_words[:10])
173
  prompt = f'''Voici le document:
174
  - {text}
175
  Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.