ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

python 将文件 批量转换编码

2022-06-17 13:05:31  阅读:169  来源: 互联网

标签:编码 file name 批量 python list filename fileencode print


文件批量转换编码    原始基础代码来自互联网   在其基础上 改了改  做了一些适配   记录下 只用了  code2code 这个方法 其他没用到  也就没去动了。

# -*- coding:utf-8 -*-
# @Time : 2022/3/22 20:04
# @Author: zhcode
# @File : convert_file_encode.py
import chardet
import codecs
import os
import sys
import time
 
CURRENT_PATH = os.path.abspath('.')
 
def code2code(filename, encode_out):
    print("transfer filename " + filename + " ... ")
    fileencoding = chardet.detect(open(filename, "rb").read())
    fileencode = fileencoding['encoding']
    if fileencode == encode_out:
        print("encode is no need to transfer...")
        return

    if fileencode:
        print("transfer detect codetype = " + fileencode)
    else:
        print(fileencode)
        return 

    with codecs.open(filename, mode='r', encoding=fileencode) as fi:
        data = fi.read()
        with codecs.open(filename, mode='w', encoding=encode_out) as fo:
            fo.write(data)
    return os.path.basename(filename), fileencode
 
 
def main():
    try:
        if len(sys.argv) <= 1:
            convert()
        elif sys.argv[1] == '-h':
            helper()
        else:
            argv = sys.argv[1:]
            args = [(op, argv[i + 1]) for i, op in enumerate(argv) if i % 2 == 0]
            keys = list(map(lambda x: x[0], args))
            values = list(map(lambda x: x[1], args))
 
            encode_out = "gb2312"
            dir_path = None
            if '-h' in keys:
                helper()
            if '-f' in keys:
                t = values[keys.index('-f')]
            if '-d' in keys:
                dir_path = values[keys.index('-d')]
            convert(f=encode_out, d=dir_path)
 
            print('Finish 转换完毕')
 
    except KeyboardInterrupt:
        print("\ngoodbye.")
    except Exception as ex:
        print(ex)
        exit(1)
 
 
def time_format(time_diff):
    add_zero = lambda t: "0{}".format(t) if t < 10 else t
    return "{}:{}:{}".format(add_zero(int(time_diff / 3600)), add_zero(int((time_diff % 3600) / 60)),
                             add_zero(int(time_diff % 60)))
 
 
def traverse_dir(file_dir):
    """
    Traverse the specific folder and return picture's name list.
    :param file_dir: Traverse folder name
    :return: picture's name list
    """
    file_path_list = []
    try:
        for root, dirs, files in os.walk(file_dir):
            # print(root, dirs, files)
            for file in files:
                if os.path.splitext(file)[1] not in [".py", ".class", ".gif", ".png", ".jpg", ".project",]:
                    file_path_list.append(os.path.join(root, file))
    except Exception as ex:
        print("文件路径不正确!")
 
    return file_path_list
 
 
def convert(f=None, d=None):
    encode_out = f
    dir_path = None
    if not d:
        dir_path = CURRENT_PATH
    else:
        if os.path.isdir(d):
            dir_path = d
        else:
            raise Exception("该路径不是一个文件夹.")
 
    start_time = time.time()
    file_path_list = traverse_dir(dir_path)
    len_pics = len(file_path_list)
    # iterate filename
    # print(f_dir, " ", d_dir)
    for i in range(len_pics):
        time_diff = int(time.time() - start_time)
        time_eat = time_format(time_diff)
 
        file_name, file_encode = code2code(file_path_list[i], encode_out)
 
        progressbar(i + 1, len_pics, 50, r"{time_eat} {file_name} {file_encode}")
 
 
def progressbar(curr, total, duration=10, extra=''):
    """
    show the progress bar
    :param curr:
    :param total:
    :param duration:
    :param extra:
    :return:
    """

    print(curr)
    print(total)
    frac = curr / total
    print(frac)
    # filled = int(round(frac * duration))
    filled = round(frac * duration)

    ffff = int(filled)
    tstr = ''
    for x in xrange(1,ffff):
        # pass
        tstr = tstr + '#'

    tstr1 = ''
    for x in xrange(1,duration - ffff):
        # pass
        tstr1 = tstr1 + ' '

    
    # print('{0:.1f}'.format(frac))
    print('\r'+tstr + tstr1 + '[0~{0:.0f}]'.format(frac*duration))
    sys.stdout.flush()
 
 
def helper():
    app_name = sys.argv[0]
    app_name = "./{}".format(os.path.split(app_name)[-1])
    print("====== Image format conversion ======")
    print(r'{app_name}                  # 将当前文件夹下文件格式转换为gb2312格式.')
    print(r'{app_name} -f <type>        # 设置转换的编码格式')
    print(r'{app_name} -d <dirname>     # 设置转换文件的路径')
    print(r'{app_name} -h               # 帮助')
 
 
if __name__ == '__main__':
    # main()
    file_name_list = traverse_dir(CURRENT_PATH)
    
    print("file_name_list = ",len(file_name_list))
    index = 0
    for name in file_name_list:
        index = index + 1
        print(code2code(name, "utf-8"))
        # progressbar(index, len(file_name_list), duration=100, extra='')
 

 

标签:编码,file,name,批量,python,list,filename,fileencode,print
来源: https://www.cnblogs.com/lesten/p/16385354.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有