Spaces:

Linhz
/

ViMNer

Runtime error

File size: 4,329 Bytes

6fd15ad

import re
import os
from Model.NER.VLSP2021.Predict_Ner import  ViTagger,normalize_text
def process_text(text):
    # Loại bỏ dấu cách thừa và dấu cách ở đầu và cuối văn bản
    processed_text = re.sub(r'\s+', ' ', text.strip())
    return processed_text

# Sử dụng hàm process_text để xử lý văn bản
text = """

Trang Footballogue vừa đăng tải đoạn video được cho là quay ở phòng tập thể dục của CLB Al Nassr vào hôm 7/8. Trong đoạn video đó, C.Ronaldo vẫn miệt mài tập luyện một mình, dù cho cả đội đã ra về từ lâu.



Tờ báo này bình luận: "Khi tất cả các đồng đội ở Al Nassr ra về, C.Ronaldo vẫn miệt mài tập luyện. Kỷ luật của CR7 thật đáng ngưỡng mộ khi cầu thủ này đã có trong tay mọi thứ".



Trên trang Twitter, những người hâm mộ đã bày tỏ sự thán phục sự chăm chỉ và chuyên nghiệp của C.Ronaldo. Dưới đây là một vài dòng bình luận:



"C.Ronaldo là biểu tượng của sự tận hiến trong bóng đá".



"Ở tuổi 38, khi nhiều cầu thủ treo giày, C.Ronaldo vẫn miệt mài tập luyện. Bạn sẽ không tìm cầu thủ thứ hai trong lịch sử như vậy".

"""

# processed_text = process_text(text)
# print(processed_text)

LABEL2ID_VLSP2021 = ['O', 'LOCATION-GPE', 'QUANTITY-NUM', 'EVENT-CUL', 'DATETIME', 'PERSONTYPE', 'PERSON', 'QUANTITY-PER', 'ORGANIZATION', 'LOCATION-GEO', 'LOCATION-STRUC', 'PRODUCT-COM', 'DATETIME-DATE', 'QUANTITY-DIM', 'PRODUCT', 'QUANTITY', 'DATETIME-DURATION', 'PERSON', 'QUANTITY-CUR', 'DATETIME-TIME', 'QUANTITY-TEM', 'DATETIME-TIMERANGE', 'EVENT-GAMESHOW', 'QUANTITY-AGE', 'QUANTITY-ORD', 'PRODUCT-LEGAL', 'PERSONTYPE', 'LOCATION', 'ORGANIZATION-MED', 'URL', 'PHONENUMBER', 'ORGANIZATION-SPORTS', 'EVENT-SPORT', 'SKILL', 'EVENT-NATURAL', 'ADDRESS', 'IP', 'EMAIL', 'ORGANIZATION-STOCK', 'DATETIME-SET', 'PRODUCT-AWARD', 'MISCELLANEOUS', 'LOCATION-GPE-GEO']
# print(len(LABEL2ID_VLSP2021))

def save_uploaded_image(image, directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    file_path = os.path.join(directory, image.name)
    with open(file_path, "wb") as f:
        f.write(image.getbuffer())
# def convert_text_to_txt(text,file_path):
#     # Gộp các dòng văn bản thành một đoạn văn
#     paragraph = text.replace('\n', ' ')
#
#     # Sử dụng biểu thức chính quy để tách từ và dấu câu
#     words_list = re.findall(r'\w+|[.,]', paragraph)
#     with open(file_path, 'w', encoding='utf-8') as file:
#         for word in words_list:
#             file.write(word + '\n')
#     return words_list



# # Văn bản mẫu
# text = """Toi ten la Minh"""
# # Sử dụng hàm để chuyển đổi văn bản
# sa='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/list.txt'
# convert_text_to_txt(text ,sa)

def add_string_to_txt(string, file_path):
    # Đọc dữ liệu từ tệp
    file_name = string.split('.')[0]

    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # Thêm chuỗi vào dòng đầu tiên
    lines.insert(0, f"IMGID:{file_name}\n")

    # Ghi lại dữ liệu vào tệp
    with open(file_path, 'w', encoding='utf-8') as file:
        file.writelines(lines)

# string= 'namngo.jpg'
# add_string_to_txt(string, sa)
# # In kết quả


import os
import re

def convert_text_to_txt(text, file_path):
    # Merge lines of text into a paragraph
    paragraph = text.replace('\n', ' ')

    # Use regular expression to separate words and punctuation marks
    words_list = re.findall(r'\w+|[.,]', paragraph)

    # Ensure the directory exists
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Write words to the file
    with open(file_path, 'w', encoding='utf-8') as file:
        for word in words_list:
            file.write(word + '\n')

    return words_list

# Example usage
# text = "This is some example text."
# output_file_path = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/output.txt'
# convert_text_to_txt(text, output_file_path)