Spaces:

seanpedrickcase
/

document_redaction

Running

App Files Files Community

seanpedrickcase commited on Jan 15

Commit

c3a8cd7

1 Parent(s): 6ac4be4

You can now have output redaction boxes in grey according to an environment variable. Review files are now saved every time page is changed.

Browse files

Files changed (4) hide show

app.py +21 -8
tools/auth.py +3 -3
tools/file_conversion.py +7 -3
tools/redaction_review.py +52 -57

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from gradio_image_annotation.image_annotator import AnnotatedImageData
 from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, custom_regex_load, reset_state_vars, load_in_default_allow_list, tesseract_ocr_option, text_ocr_option, textract_option, local_pii_detector, aws_pii_detector
 from tools.aws_functions import upload_file_to_s3, download_file_from_s3, RUN_AWS_FUNCTIONS, bucket_name
 from tools.file_redaction import choose_and_run_redactor
-from tools.file_conversion import prepare_image_or_pdf, get_input_file_names
 from tools.redaction_review import apply_redactions, modify_existing_page_redactions, decrease_page, increase_page, update_annotator, update_zoom, update_entities_df, df_select_callback
 from tools.data_anonymise import anonymise_data_files
 from tools.auth import authenticate_user
@@ -41,6 +41,8 @@ full_entity_list = ["TITLES", "PERSON", "PHONE_NUMBER", "EMAIL_ADDRESS", "STREET
 language = 'en'
 host_name = socket.gethostname()
 feedback_logs_folder = 'feedback/' + today_rev + '/' + host_name + '/'
 access_logs_folder = 'logs/' + today_rev + '/' + host_name + '/'
@@ -84,6 +86,8 @@ with app:
     log_files_output_list_state = gr.State([])
     review_file_state = gr.State(pd.DataFrame())
     # Logging state
     log_file_name = 'log.csv'
@@ -197,7 +201,7 @@ with app:
     # Object annotation
     with gr.Tab("Review redactions", id="tab_object_annotation"):
-        with gr.Accordion(label = "Review redaction file", open=False):
             output_review_files = gr.File(label="Review output files", file_count='multiple', height=file_input_height)
             upload_previous_review_file_btn = gr.Button("Review previously created redaction file (upload original PDF and ...review_file.csv)")
@@ -344,14 +348,18 @@ with app:
     # Page controls at top
     annotate_current_page.submit(
         modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
-        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base])
     annotation_last_page_button.click(fn=decrease_page, inputs=[annotate_current_page], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
-        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base])
     annotation_next_page_button.click(fn=increase_page, inputs=[annotate_current_page, all_image_annotations_state], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
-        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base])
     # Zoom in and out on annotator
     annotate_zoom_in.click(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
@@ -368,18 +376,23 @@ with app:
     #annotation_button_get.click(get_boxes_json, annotator, json_boxes)
     annotation_button_apply.click(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output], scroll_to_output=True)
     # Page controls at bottom
     annotate_current_page_bottom.submit(
         modify_existing_page_redactions, inputs = [annotator, annotate_current_page_bottom, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
-        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base])
     annotation_last_page_button_bottom.click(fn=decrease_page, inputs=[annotate_current_page], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
-        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base])
     annotation_next_page_button_bottom.click(fn=increase_page, inputs=[annotate_current_page, all_image_annotations_state], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
-        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base])
     # Review side bar controls
     recogniser_entity_dropdown.select(update_entities_df, inputs=[recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs=[recogniser_entity_dataframe])

 from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, custom_regex_load, reset_state_vars, load_in_default_allow_list, tesseract_ocr_option, text_ocr_option, textract_option, local_pii_detector, aws_pii_detector
 from tools.aws_functions import upload_file_to_s3, download_file_from_s3, RUN_AWS_FUNCTIONS, bucket_name
 from tools.file_redaction import choose_and_run_redactor
+from tools.file_conversion import prepare_image_or_pdf, get_input_file_names, CUSTOM_BOX_COLOUR
 from tools.redaction_review import apply_redactions, modify_existing_page_redactions, decrease_page, increase_page, update_annotator, update_zoom, update_entities_df, df_select_callback
 from tools.data_anonymise import anonymise_data_files
 from tools.auth import authenticate_user
 language = 'en'
 host_name = socket.gethostname()
 feedback_logs_folder = 'feedback/' + today_rev + '/' + host_name + '/'
 access_logs_folder = 'logs/' + today_rev + '/' + host_name + '/'
     log_files_output_list_state = gr.State([])
     review_file_state = gr.State(pd.DataFrame())
+    do_not_save_pdf_state = gr.State(False)
     # Logging state
     log_file_name = 'log.csv'
     # Object annotation
     with gr.Tab("Review redactions", id="tab_object_annotation"):
+        with gr.Accordion(label = "Review redaction file", open=True):
             output_review_files = gr.File(label="Review output files", file_count='multiple', height=file_input_height)
             upload_previous_review_file_btn = gr.Button("Review previously created redaction file (upload original PDF and ...review_file.csv)")
     # Page controls at top
     annotate_current_page.submit(
         modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
+        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base]).\
+        then(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state, do_not_save_pdf_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output])
     annotation_last_page_button.click(fn=decrease_page, inputs=[annotate_current_page], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
+        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base]).\
+        then(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state, do_not_save_pdf_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output])
     annotation_next_page_button.click(fn=increase_page, inputs=[annotate_current_page, all_image_annotations_state], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
+        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base]).\
+        then(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state, do_not_save_pdf_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output])
     # Zoom in and out on annotator
     annotate_zoom_in.click(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
     #annotation_button_get.click(get_boxes_json, annotator, json_boxes)
     annotation_button_apply.click(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output], scroll_to_output=True)
+    do_not_save_pdf_state
     # Page controls at bottom
     annotate_current_page_bottom.submit(
         modify_existing_page_redactions, inputs = [annotator, annotate_current_page_bottom, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
+        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base]).\
+        then(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state, do_not_save_pdf_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output])
     annotation_last_page_button_bottom.click(fn=decrease_page, inputs=[annotate_current_page], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
+        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base]).\
+        then(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state, do_not_save_pdf_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output])
     annotation_next_page_button_bottom.click(fn=increase_page, inputs=[annotate_current_page, all_image_annotations_state], outputs=[annotate_current_page, annotate_current_page_bottom]).\
         then(modify_existing_page_redactions, inputs = [annotator, annotate_current_page, annotate_previous_page, all_image_annotations_state, recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs = [all_image_annotations_state, annotate_previous_page, annotate_current_page_bottom, recogniser_entity_dropdown, recogniser_entity_dataframe_base]).\
+        then(update_annotator, inputs=[all_image_annotations_state, annotate_current_page, recogniser_entity_dropdown, recogniser_entity_dataframe_base, annotator_zoom_number], outputs = [annotator, annotate_current_page, annotate_current_page_bottom, annotate_previous_page, recogniser_entity_dropdown, recogniser_entity_dataframe, recogniser_entity_dataframe_base]).\
+        then(apply_redactions, inputs=[annotator, doc_full_file_name_textbox, pdf_doc_state, all_image_annotations_state, annotate_current_page, review_file_state, do_not_save_pdf_state], outputs=[pdf_doc_state, all_image_annotations_state, output_review_files, log_files_output])
     # Review side bar controls
     recogniser_entity_dropdown.select(update_entities_df, inputs=[recogniser_entity_dropdown, recogniser_entity_dataframe_base], outputs=[recogniser_entity_dataframe])

tools/auth.py CHANGED Viewed

@@ -7,13 +7,13 @@ import base64
 from tools.helper_functions import get_or_create_env_var
 client_id = get_or_create_env_var('AWS_CLIENT_ID', '')
-print(f'The value of AWS_CLIENT_ID is {client_id}')
 client_secret = get_or_create_env_var('AWS_CLIENT_SECRET', '')
-print(f'The value of AWS_CLIENT_SECRET is {client_secret}')
 user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
-print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
 def calculate_secret_hash(client_id, client_secret, username):
     message = username + client_id

 from tools.helper_functions import get_or_create_env_var
 client_id = get_or_create_env_var('AWS_CLIENT_ID', '')
+#print(f'The value of AWS_CLIENT_ID is {client_id}')
 client_secret = get_or_create_env_var('AWS_CLIENT_SECRET', '')
+#print(f'The value of AWS_CLIENT_SECRET is {client_secret}')
 user_pool_id = get_or_create_env_var('AWS_USER_POOL_ID', '')
+#print(f'The value of AWS_USER_POOL_ID is {user_pool_id}')
 def calculate_secret_hash(client_id, client_secret, username):
     message = username + client_id

tools/file_conversion.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from pdf2image import convert_from_path, pdfinfo_from_path
-from tools.helper_functions import get_file_path_end, output_folder, tesseract_ocr_option, text_ocr_option, textract_option, local_pii_detector, aws_pii_detector, read_file
 from PIL import Image, ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 import os
@@ -48,7 +48,8 @@ def is_pdf(filename):
 # %%
 ## Convert pdf to image if necessary
 def process_single_page(pdf_path: str, page_num: int, image_dpi: float, output_dir: str = 'input') -> tuple[int, str]:
     try:
@@ -261,7 +262,10 @@ def redact_single_box(pymupdf_page:Page, pymupdf_rect:Rect, img_annotation_box:d
         else:
             out_colour = img_annotation_box["color"]
     else:
-        out_colour = (0,0,0)
     shape.finish(color=out_colour, fill=out_colour)  # Black fill for the rectangle
     #shape.finish(color=(0, 0, 0))  # Black fill for the rectangle

 from pdf2image import convert_from_path, pdfinfo_from_path
+from tools.helper_functions import get_file_path_end, output_folder, tesseract_ocr_option, text_ocr_option, textract_option, read_file, get_or_create_env_var
 from PIL import Image, ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 import os
 # %%
 ## Convert pdf to image if necessary
+CUSTOM_BOX_COLOUR = get_or_create_env_var("CUSTOM_BOX_COLOUR", "")
+print(f'The value of CUSTOM_BOX_COLOUR is {CUSTOM_BOX_COLOUR}')
 def process_single_page(pdf_path: str, page_num: int, image_dpi: float, output_dir: str = 'input') -> tuple[int, str]:
     try:
         else:
             out_colour = img_annotation_box["color"]
     else:
+        if CUSTOM_BOX_COLOUR == "grey":
+            out_colour = (0.5, 0.5, 0.5)
+        else:
+            out_colour = (0,0,0)
     shape.finish(color=out_colour, fill=out_colour)  # Black fill for the rectangle
     #shape.finish(color=(0, 0, 0))  # Black fill for the rectangle

tools/redaction_review.py CHANGED Viewed

@@ -55,12 +55,6 @@ def update_annotator(image_annotator_object:AnnotatedImageData, page_num:int, re
     '''
     recogniser_entities = []
     recogniser_dataframe = pd.DataFrame()
-    #recogniser_entities_drop = gr.Dropdown(value="ALL", allow_custom_value=True)
-    #recogniser_dataframe_gr = gr.Dataframe(pd.DataFrame(data={"page":[""], "label":[""]}))
-    #print("recogniser_dataframe_gr", recogniser_dataframe_gr)
-    #print("recogniser_dataframe_gr shape", recogniser_dataframe_gr.shape)
-    #print("recogniser_dataframe_gr.iloc[0,0]:",  recogniser_dataframe_gr.iloc[0,0])
     if recogniser_dataframe_gr.iloc[0,0] == "":
         try:
@@ -228,7 +222,7 @@ def modify_existing_page_redactions(image_annotated:AnnotatedImageData, current_
     return all_image_annotations, current_page, current_page, recogniser_entities_drop, recogniser_dataframe_out
-def apply_redactions(image_annotated:AnnotatedImageData, file_paths:List[str], doc:Document, all_image_annotations:List[AnnotatedImageData], current_page:int, review_file_state, progress=gr.Progress(track_tqdm=True)):
     '''
     Apply modified redactions to a pymupdf and export review files
     '''
@@ -251,75 +245,76 @@ def apply_redactions(image_annotated:AnnotatedImageData, file_paths:List[str], d
         file_paths = [file_paths]
     for file_path in file_paths:
-        print("file_path:", file_path)
         file_base = get_file_path_end(file_path)
         file_extension = os.path.splitext(file_path)[1].lower()
-        # If working with image docs
-        if (is_pdf(file_path) == False) & (file_extension not in '.csv'):
-            image = Image.open(file_paths[-1])
-            #image = pdf_doc
-            draw = ImageDraw.Draw(image)
-            for img_annotation_box in image_annotated['boxes']:
-                coords = [img_annotation_box["xmin"],
-                img_annotation_box["ymin"],
-                img_annotation_box["xmax"],
-                img_annotation_box["ymax"]]
-                fill = img_annotation_box["color"]
-                draw.rectangle(coords, fill=fill)
-                image.save(output_folder + file_base + "_redacted.png")
-            doc = [image]
-        elif file_extension in '.csv':
-            print("This is a csv")
-            pdf_doc = []
-        # If working with pdfs
-        elif is_pdf(file_path) == True:
-            pdf_doc = pymupdf.open(file_path)
-            number_of_pages = pdf_doc.page_count
-            print("Saving pages to file.")
-            for i in progress.tqdm(range(0, number_of_pages), desc="Saving redactions to file", unit = "pages"):
-                #print("Saving page", str(i))
-                image_loc = all_image_annotations[i]['image']
-                #print("Image location:", image_loc)
-                # Load in image object
-                if isinstance(image_loc, np.ndarray):
-                    image = Image.fromarray(image_loc.astype('uint8'))
-                    #all_image_annotations[i]['image'] = image_loc.tolist()
-                elif isinstance(image_loc, Image.Image):
-                    image = image_loc
-                    #image_out_folder = output_folder + file_base + "_page_" + str(i) + ".png"
-                    #image_loc.save(image_out_folder)
-                    #all_image_annotations[i]['image'] = image_out_folder
-                elif isinstance(image_loc, str):
-                    image = Image.open(image_loc)
-                pymupdf_page = pdf_doc.load_page(i) #doc.load_page(current_page -1)
-                pymupdf_page = redact_page_with_pymupdf(pymupdf_page, all_image_annotations[i], image)
-        else:
-            print("File type not recognised.")
-        #try:
-        if pdf_doc:
-            out_pdf_file_path = output_folder + file_base + "_redacted.pdf"
-            pdf_doc.save(out_pdf_file_path)
-            output_files.append(out_pdf_file_path)
         try:
             print("Saving annotations to JSON")
@@ -331,7 +326,7 @@ def apply_redactions(image_annotated:AnnotatedImageData, file_paths:List[str], d
             print("Saving annotations to CSV review file")
-            print("review_file_state:", review_file_state)
             # Convert json to csv and also save this
             review_df = convert_review_json_to_pandas_df(all_image_annotations, review_file_state)

     '''
     recogniser_entities = []
     recogniser_dataframe = pd.DataFrame()
     if recogniser_dataframe_gr.iloc[0,0] == "":
         try:
     return all_image_annotations, current_page, current_page, recogniser_entities_drop, recogniser_dataframe_out
+def apply_redactions(image_annotated:AnnotatedImageData, file_paths:List[str], doc:Document, all_image_annotations:List[AnnotatedImageData], current_page:int, review_file_state, save_pdf:bool=True, progress=gr.Progress(track_tqdm=True)):
     '''
     Apply modified redactions to a pymupdf and export review files
     '''
         file_paths = [file_paths]
     for file_path in file_paths:
+        #print("file_path:", file_path)
         file_base = get_file_path_end(file_path)
         file_extension = os.path.splitext(file_path)[1].lower()
+        if save_pdf == True:
+            # If working with image docs
+            if (is_pdf(file_path) == False) & (file_extension not in '.csv'):
+                image = Image.open(file_paths[-1])
+                #image = pdf_doc
+                draw = ImageDraw.Draw(image)
+                for img_annotation_box in image_annotated['boxes']:
+                    coords = [img_annotation_box["xmin"],
+                    img_annotation_box["ymin"],
+                    img_annotation_box["xmax"],
+                    img_annotation_box["ymax"]]
+                    fill = img_annotation_box["color"]
+                    draw.rectangle(coords, fill=fill)
+                    image.save(output_folder + file_base + "_redacted.png")
+                doc = [image]
+            elif file_extension in '.csv':
+                print("This is a csv")
+                pdf_doc = []
+            # If working with pdfs
+            elif is_pdf(file_path) == True:
+                pdf_doc = pymupdf.open(file_path)
+                number_of_pages = pdf_doc.page_count
+                print("Saving pages to file.")
+                for i in progress.tqdm(range(0, number_of_pages), desc="Saving redactions to file", unit = "pages"):
+                    #print("Saving page", str(i))
+                    image_loc = all_image_annotations[i]['image']
+                    #print("Image location:", image_loc)
+                    # Load in image object
+                    if isinstance(image_loc, np.ndarray):
+                        image = Image.fromarray(image_loc.astype('uint8'))
+                        #all_image_annotations[i]['image'] = image_loc.tolist()
+                    elif isinstance(image_loc, Image.Image):
+                        image = image_loc
+                        #image_out_folder = output_folder + file_base + "_page_" + str(i) + ".png"
+                        #image_loc.save(image_out_folder)
+                        #all_image_annotations[i]['image'] = image_out_folder
+                    elif isinstance(image_loc, str):
+                        image = Image.open(image_loc)
+                    pymupdf_page = pdf_doc.load_page(i) #doc.load_page(current_page -1)
+                    pymupdf_page = redact_page_with_pymupdf(pymupdf_page, all_image_annotations[i], image)
+            else:
+                print("File type not recognised.")
+            #try:
+            if pdf_doc:
+                out_pdf_file_path = output_folder + file_base + "_redacted.pdf"
+                pdf_doc.save(out_pdf_file_path)
+                output_files.append(out_pdf_file_path)
         try:
             print("Saving annotations to JSON")
             print("Saving annotations to CSV review file")
+            #print("review_file_state:", review_file_state)
             # Convert json to csv and also save this
             review_df = convert_review_json_to_pandas_df(all_image_annotations, review_file_state)