Spaces:
Running
Running
Ilyas KHIAT
commited on
Commit
·
8a6cca6
1
Parent(s):
5596129
vue enhance
Browse files- utils/audit/audit_doc.py +11 -11
utils/audit/audit_doc.py
CHANGED
@@ -4,14 +4,14 @@ import tiktoken
|
|
4 |
import textstat
|
5 |
from docx import Document
|
6 |
import io
|
7 |
-
from rake_nltk import Rake
|
8 |
-
import nltk
|
9 |
-
from nltk.corpus import stopwords
|
10 |
from openai import OpenAI
|
11 |
|
12 |
# Download NLTK stopwords
|
13 |
-
nltk.download('stopwords')
|
14 |
-
nltk.download('punkt')
|
15 |
|
16 |
#function to use gpt4o-mini
|
17 |
def extract_relevant_keywords(prompt: str) -> str:
|
@@ -65,10 +65,10 @@ def evaluate_text_quality(text: str) -> dict:
|
|
65 |
# Scale the global score to 0-5
|
66 |
global_score_0_5 = global_score * 5
|
67 |
|
68 |
-
def extract_keywords(text):
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
|
74 |
|
@@ -168,8 +168,8 @@ def audit_descriptif_pdf(file,max_img_width) -> dict:
|
|
168 |
|
169 |
# Extract key words from the document
|
170 |
text = " ".join([page["texte"] for page in doc_content.values()])
|
171 |
-
key_words = extract_keywords(text)
|
172 |
-
list_key_words_text = "\n".join(key_words[:10])
|
173 |
prompt = f'''Voici le document:
|
174 |
- {text}
|
175 |
Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
|
|
|
4 |
import textstat
|
5 |
from docx import Document
|
6 |
import io
|
7 |
+
# from rake_nltk import Rake
|
8 |
+
# import nltk
|
9 |
+
# from nltk.corpus import stopwords
|
10 |
from openai import OpenAI
|
11 |
|
12 |
# Download NLTK stopwords
|
13 |
+
# nltk.download('stopwords')
|
14 |
+
# nltk.download('punkt')
|
15 |
|
16 |
#function to use gpt4o-mini
|
17 |
def extract_relevant_keywords(prompt: str) -> str:
|
|
|
65 |
# Scale the global score to 0-5
|
66 |
global_score_0_5 = global_score * 5
|
67 |
|
68 |
+
# def extract_keywords(text):
|
69 |
+
# rake = Rake(stopwords.words('french'))
|
70 |
+
# rake.extract_keywords_from_text(text)
|
71 |
+
# return rake.get_ranked_phrases()
|
72 |
|
73 |
|
74 |
|
|
|
168 |
|
169 |
# Extract key words from the document
|
170 |
text = " ".join([page["texte"] for page in doc_content.values()])
|
171 |
+
# key_words = extract_keywords(text)
|
172 |
+
# list_key_words_text = "\n".join(key_words[:10])
|
173 |
prompt = f'''Voici le document:
|
174 |
- {text}
|
175 |
Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
|