File size: 2,950 Bytes
f1f9265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# @Author:  Pevernow ([email protected])
# @Date:    2025/1/5
# @License: (Follow the main project)
import json
import os
import tarfile

from PIL import Image, PngImagePlugin

PngImagePlugin.MAX_TEXT_CHUNK = 100 * 1024 * 1024  # Increase maximum size for text chunks


def process_data(input_dir, output_tar_name="output.tar"):
    """
    Processes a directory containing PNG files, generates corresponding JSON files,
    and packages all files into a TAR file. It also counts the number of processed PNG images,
    and saves the height and width of each PNG file to the JSON.

    Args:
        input_dir (str): The input directory containing PNG files.
        output_tar_name (str): The name of the output TAR file (default is "output.tar").
    """
    png_count = 0
    json_files_created = []

    for filename in os.listdir(input_dir):
        if filename.lower().endswith(".png"):
            png_count += 1
            base_name = filename[:-4]  # Remove the ".png" extension
            txt_filename = os.path.join(input_dir, base_name + ".txt")
            json_filename = base_name + ".json"
            json_filepath = os.path.join(input_dir, json_filename)
            png_filepath = os.path.join(input_dir, filename)

            if os.path.exists(txt_filename):
                try:
                    # Get the dimensions of the PNG image
                    with Image.open(png_filepath) as img:
                        width, height = img.size

                    with open(txt_filename, encoding="utf-8") as f:
                        caption_content = f.read().strip()

                    data = {"file_name": filename, "prompt": caption_content, "width": width, "height": height}

                    with open(json_filepath, "w", encoding="utf-8") as outfile:
                        json.dump(data, outfile, indent=4, ensure_ascii=False)

                    print(f"Generated: {json_filename}")
                    json_files_created.append(json_filepath)

                except Exception as e:
                    print(f"Error processing file {filename}: {e}")
            else:
                print(f"Warning: No corresponding TXT file found for {filename}.")

    # Create a TAR file and include all files
    with tarfile.open(output_tar_name, "w") as tar:
        for item in os.listdir(input_dir):
            item_path = os.path.join(input_dir, item)
            tar.add(item_path, arcname=item)  # arcname maintains the relative path of the file in the tar

    print(f"\nAll files have been packaged into: {output_tar_name}")
    print(f"Number of PNG images processed: {png_count}")


if __name__ == "__main__":
    input_directory = input("Please enter the directory path containing PNG and TXT files: ")
    output_tar_filename = (
        input("Please enter the name of the output TAR file (default is output.tar): ") or "output.tar"
    )
    process_data(input_directory, output_tar_filename)