Ilyas KHIAT commited on
Commit
eeaf024
·
1 Parent(s): fe370a3

api first commit by me :)

Browse files
Files changed (2) hide show
  1. main.py +2 -1
  2. rag.py +11 -1
main.py CHANGED
@@ -62,7 +62,8 @@ async def upload_file(file: UploadFile, enterprise_data: Json[EnterpriseData]):
62
 
63
  # Assign a new UUID if id is not provided
64
  if enterprise_data.id is None:
65
- enterprise_data.id = f"{enterprise_name}_{uuid4()}"
 
66
 
67
  # Open the file with PyMuPDF
68
  pdf_document = pymupdf.open(stream=contents, filetype="pdf")
 
62
 
63
  # Assign a new UUID if id is not provided
64
  if enterprise_data.id is None:
65
+ clean_name = remove_non_standard_ascii(enterprise_name)
66
+ enterprise_data.id = f"{clean_name}_{uuid4()}"
67
 
68
  # Open the file with PyMuPDF
69
  pdf_document = pymupdf.open(stream=contents, filetype="pdf")
rag.py CHANGED
@@ -8,6 +8,13 @@ from langchain_openai import ChatOpenAI
8
  from langchain_core.output_parsers import StrOutputParser
9
  from langchain_core.prompts import PromptTemplate
10
 
 
 
 
 
 
 
 
11
 
12
 
13
  def get_text_from_content_for_doc(content):
@@ -44,7 +51,8 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
44
  page_content=chunk,
45
  metadata={"filename":filename,"file_type":file_type},
46
  )
47
- uuid = f"{file_name}_{i}"
 
48
  uuids.append(uuid)
49
  documents.append(document)
50
 
@@ -53,6 +61,7 @@ def get_vectorstore(text_chunks,filename, file_type,namespace,index):
53
  return True
54
 
55
  except Exception as e:
 
56
  return False
57
 
58
  def get_retreive_answer(enterprise_id,prompt,index):
@@ -70,6 +79,7 @@ def get_retreive_answer(enterprise_id,prompt,index):
70
  return response
71
 
72
  except Exception as e:
 
73
  return False
74
 
75
  def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :
 
8
  from langchain_core.output_parsers import StrOutputParser
9
  from langchain_core.prompts import PromptTemplate
10
 
11
+ import unicodedata
12
+
13
+ def remove_non_standard_ascii(input_string: str) -> str:
14
+ normalized_string = unicodedata.normalize('NFKD', input_string)
15
+ return ''.join(char for char in normalized_string if 'a' <= char <= 'z' or 'A' <= char <= 'Z' or char.isdigit() or char in ' .,!?')
16
+
17
+
18
 
19
 
20
  def get_text_from_content_for_doc(content):
 
51
  page_content=chunk,
52
  metadata={"filename":filename,"file_type":file_type},
53
  )
54
+ clean_filename = remove_non_standard_ascii(file_name)
55
+ uuid = f"{clean_filename}_{i}"
56
  uuids.append(uuid)
57
  documents.append(document)
58
 
 
61
  return True
62
 
63
  except Exception as e:
64
+ print(e)
65
  return False
66
 
67
  def get_retreive_answer(enterprise_id,prompt,index):
 
79
  return response
80
 
81
  except Exception as e:
82
+ print(e)
83
  return False
84
 
85
  def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :