[python]批次將資料夾內的檔案轉換成UTF-8

type
status
date
slug
summary
python


import os
import codecs
import chardet

# Set the folder path and file extensions
input_folder = r'D:\Data\temp'  # Specify your folder path here
extensions = []  # Specify the file extensions to convert, leave empty to convert all files
# extensions = ['.cpp', '.h'] # this line is an example to convert file with extension of .cpp and .h

def detect_encoding(file_path):
    """Detect the encoding of the file."""
    with open(file_path, 'rb') as f:
        raw_data = f.read()
    result = chardet.detect(raw_data)
    return result['encoding']

def convert_to_utf8(input_folder, extensions):
    # Create the output folder if it doesn't exist
    output_folder = os.path.join(input_folder, "output")
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Loop through each file in the input folder
    for filename in os.listdir(input_folder):
        input_file_path = os.path.join(input_folder, filename)

        # Check if it's a file
        if os.path.isfile(input_file_path):
            # If extensions are specified, check if the file ends with one of them
            if not extensions or any(filename.endswith(ext) for ext in extensions):
                output_file_path = os.path.join(output_folder, filename)

                # Detect the encoding of the input file
                encoding = detect_encoding(input_file_path)

                # Open the input file with the detected encoding and read the contents
                with codecs.open(input_file_path, 'r', encoding=encoding, errors='ignore') as file:
                    content = file.read()

                # Write the contents to a new file in the output folder with UTF-8 encoding
                with codecs.open(output_file_path, 'w', encoding='utf-8') as output_file:
                    output_file.write(content)

    print(f"All specified files have been converted and saved to {output_folder}")

# Call the function
convert_to_utf8(input_folder, extensions)
python

JK Yang