Hieucyber2208 commited on
Commit
6ad42a4
·
verified ·
1 Parent(s): 59fa5a3

Update src/text_processing.py

Browse files
Files changed (1) hide show
  1. src/text_processing.py +3 -3
src/text_processing.py CHANGED
@@ -122,7 +122,7 @@ def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_st
122
  def text_processing(file_path, analysis_level='basic', writting_style='academic', word_lower_limit = 100, word_upper_limit = 150):
123
  # Trích xuất văn bản từ file PDF
124
  text = extract_text_from_file(file_path=file_path)
125
- with open("./data/text/text.txt", "w", encoding="utf-8") as f:
126
  f.write(text)
127
  # Tách văn bản theo ngữ nghĩa
128
  semantic_chunks = split_text_by_semantics(text)
@@ -131,7 +131,7 @@ def text_processing(file_path, analysis_level='basic', writting_style='academic'
131
  explanations = generate_explaination_for_chunks(semantic_chunks, analysis_level=analysis_level, writting_style = writting_style, word_lower_limit = word_lower_limit, word_upper_limit=word_upper_limit)
132
 
133
  # Tạo thư mục nếu chưa tồn tại
134
- output_dir = "./data/text/"
135
  os.makedirs(output_dir, exist_ok=True)
136
 
137
  # Lưu từng câu vào file riêng biệt
@@ -148,4 +148,4 @@ def text_processing(file_path, analysis_level='basic', writting_style='academic'
148
  print(f"Đã lưu: {output_file}")
149
  ####################### - MAIN CODE - #######################
150
  if __name__ == "__main__":
151
- text_processing(file_path = "./data/input/sample_3.pdf")
 
122
  def text_processing(file_path, analysis_level='basic', writting_style='academic', word_lower_limit = 100, word_upper_limit = 150):
123
  # Trích xuất văn bản từ file PDF
124
  text = extract_text_from_file(file_path=file_path)
125
+ with open("./text/text.txt", "w", encoding="utf-8") as f:
126
  f.write(text)
127
  # Tách văn bản theo ngữ nghĩa
128
  semantic_chunks = split_text_by_semantics(text)
 
131
  explanations = generate_explaination_for_chunks(semantic_chunks, analysis_level=analysis_level, writting_style = writting_style, word_lower_limit = word_lower_limit, word_upper_limit=word_upper_limit)
132
 
133
  # Tạo thư mục nếu chưa tồn tại
134
+ output_dir = "./text/"
135
  os.makedirs(output_dir, exist_ok=True)
136
 
137
  # Lưu từng câu vào file riêng biệt
 
148
  print(f"Đã lưu: {output_file}")
149
  ####################### - MAIN CODE - #######################
150
  if __name__ == "__main__":
151
+ text_processing(file_path = "./input/sample_3.pdf")