type
status
date
slug
summary
tags
category
icon
password
修改以下變數
input_folder為包含檔案的目錄
extensions設定要轉換的檔案的副檔名,留空則為轉換所有檔案
特色:用chardet偵測原檔案的編碼型式,再轉換成UTF-8
python
import os import codecs import chardet # Set the folder path and file extensions input_folder = r'D:\Data\temp' # Specify your folder path here extensions = [] # Specify the file extensions to convert, leave empty to convert all files # extensions = ['.cpp', '.h'] # this line is an example to convert file with extension of .cpp and .h def detect_encoding(file_path): """Detect the encoding of the file.""" with open(file_path, 'rb') as f: raw_data = f.read() result = chardet.detect(raw_data) return result['encoding'] def convert_to_utf8(input_folder, extensions): # Create the output folder if it doesn't exist output_folder = os.path.join(input_folder, "output") if not os.path.exists(output_folder): os.makedirs(output_folder) # Loop through each file in the input folder for filename in os.listdir(input_folder): input_file_path = os.path.join(input_folder, filename) # Check if it's a file if os.path.isfile(input_file_path): # If extensions are specified, check if the file ends with one of them if not extensions or any(filename.endswith(ext) for ext in extensions): output_file_path = os.path.join(output_folder, filename) # Detect the encoding of the input file encoding = detect_encoding(input_file_path) # Open the input file with the detected encoding and read the contents with codecs.open(input_file_path, 'r', encoding=encoding, errors='ignore') as file: content = file.read() # Write the contents to a new file in the output folder with UTF-8 encoding with codecs.open(output_file_path, 'w', encoding='utf-8') as output_file: output_file.write(content) print(f"All specified files have been converted and saved to {output_folder}") # Call the function convert_to_utf8(input_folder, extensions)
Python
- 作者:JK Yang
- 链接:https://jk1124.netlify.app/article/10bd03e5-8529-8075-b666-cc27fc4a6679
- 声明:本文采用 CC BY-NC-SA 4.0 许可协议,转载请注明出处。