ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

获取所有图片的MD5值,并根据MD5值去重整合

2022-07-31 09:01:52  阅读:130  来源: 互联网

标签:list 值去 same dst2 获取 file MD5 os md5


# filedeal.py
#!/usr/bin/env python
# -*- coding:utf-8 -*-

import os
import shutil
from PIL import Image
import io
import requests
import datetime
import hashlib
import time

# 获取所有文件
def getAllFiles(fire_dir):
    filepath_list = []
    for root,folder_names,file_names in os.walk(fire_dir):
        for file_name in file_names:
            file_path = root+os.sep+file_name
            filepath_list.append(file_path)
            print(file_path)
    print(filepath_list)
    return filepath_list



# 获取图片的像素
def getPicsize(pic_file):
    pic_file =pic_file
    img = Image.open(pic_file)
    w = img.width
    h =img.height
    geshi = img.format

    image_size = os.path.getsize(pic_file)
    print(image_size)

    print(w)
    print(h)
    return w,h,image_size

def getBaseName(file_name):
    file_base_name = os.path.basename(file_name)
    return  file_base_name

def getNewName(old_file_name):
    file_base_name = os.path.basename(old_file_name)
    timestrhaomiao = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f_')
    new_file_name = old_file_name.split(file_base_name)[0]+timestrhaomiao+file_base_name
    print(new_file_name)
    return new_file_name

#获取文件md5值
def getmd5(file):
    if not os.path.isfile(file):
        return
    fd = open(file,'rb')
    md5 = hashlib.md5()
    md5.update(fd.read())
    fd.close()
    return md5.hexdigest()


#将所有文件打出md5值,并进行md5值排序
#src为文件夹
def paixuMd5(src):
    all_with_many_file_list = []
    md5_not_same_list = []
    md5_same_list_all =[]
    all_file_with_md5_list = []
    all_file_with_not_same_md5_list = []
    all_file_with_same_md5_list = []
    #然后再次获取所有文件内容
    filepath_list = getAllFiles(src)
    for f in filepath_list:
        one_f_md5_list = []
        f_md5 = getmd5(f)
        one_f_md5_list.append(f_md5)
        one_f_md5_list.append(f)
        all_file_with_md5_list.append(one_f_md5_list)
        if f_md5 not in md5_not_same_list:
            md5_not_same_list.append(f_md5)
            all_file_with_not_same_md5_list.append(one_f_md5_list)
        else:
            all_file_with_same_md5_list.append(one_f_md5_list)
        print("-----------------")

    for j in range(0,len(all_file_with_same_md5_list)):
        one_with_many_file_list = []
        one_same_f_md5 = all_file_with_same_md5_list[j][0]
        one_same_f = all_file_with_same_md5_list[j][1]
        for i in range(0,len(all_file_with_not_same_md5_list)):
            not_same_f_md5 = all_file_with_not_same_md5_list[i][0]
            not_same_f = all_file_with_not_same_md5_list[i][1]
            if one_same_f_md5 == not_same_f_md5:
                one_with_many_file_list.append(not_same_f_md5)
                one_with_many_file_list.append(not_same_f)
                one_with_many_file_list.append(one_same_f)
                all_with_many_file_list.append(one_with_many_file_list)
                break
        print("........")


    print("all_with_many_file_list:")
    print(all_with_many_file_list)
    print(len(all_with_many_file_list))

    with open("youchong.txt",'w',encoding="utf-8") as f:
        for one_list in all_with_many_file_list:
            print(one_list)
            f.write(str(one_list)+"\n")

    #将目录中相同的图片移动到一个目录下
    dst2_list = [src + "\\相同"]
    for one_dst2 in dst2_list:
        if not os.path.isdir(one_dst2):
            os.mkdir(one_dst2)

    #移动相同图片到同一个目录下:
    for one_list in all_file_with_same_md5_list:
        one_f = one_list[1]
        shutil.move(one_f, src + "\\相同")

    return all_file_with_md5_list












#对文件重命名并且分类
def deal_file(src,dst1,dst2,dst3):
    # 区分jpg和mp4
    mp4 = []
    jpg = []
    png = []
    jpeg = []
    qita=[]
    #先给所有文件重命名
    filepath_list = getAllFiles(src)
    for f in filepath_list:
        old_file_name =f
        new_file_name = getNewName(old_file_name)
        os.rename(old_file_name,new_file_name)

    #然后再次获取所有文件内容
    filepath_list = getAllFiles(src)
    for f in filepath_list:
        print(f)
    # for f in os.listdir(src):
    #根据具体需求更改后缀识别参数(.mp4和jpg等)
        if f.endswith('.mp4'):
            mp4.append(f)
        elif f.endswith('.jpg'):
            jpg.append(f)
        elif f.endswith('.png'):
            png.append(f)
        elif f.endswith('.jpeg'):
            jpeg.append(f)
        else:
            qita.append(f)
    # 创建目标文件夹
    if not os.path.isdir(dst1):
        os.mkdir(dst1)
    dst2_list = [dst2,
                 dst2 + "\\横图",
                 dst2 + "\\横图\\大于等于1M",
                 dst2 + "\\横图\\小于1M",
                 dst2 + "\\竖图",
                 dst2 + "\\竖图\\大于等于1M",
                 dst2 + "\\竖图\\小于1M"

    ]
    for one_dst2 in dst2_list:
        if not os.path.isdir(one_dst2):
            os.mkdir(one_dst2)


    if not os.path.isdir(dst3):
        os.mkdir(dst3)
    # 拷贝文件到目标文件夹
    for m in mp4:
        try:
            _mp4 = os.path.join(src,m)
            shutil.move(_mp4,dst1)
        except Exception as e:
            print(e)
    for j in jpg:
        try:
            _jpg = os.path.join(src,j)
            w,h,image_size = getPicsize(pic_file=_jpg)
            if w>h:
                if image_size < 1024000:
                    shutil.move(_jpg,dst2+"\\横图\\小于1M")
                else:
                    shutil.move(_jpg, dst2 + "\\横图\\大于等于1M")
            else:
                if image_size < 1024000:
                    shutil.move(_jpg, dst2 + "\\竖图\\小于1M")
                else:
                    shutil.move(_jpg, dst2 + "\\竖图\\大于等于1M")
        except Exception as e:
            print(e)

    for p in png:
        try:
            _png = os.path.join(src,p)
            w,h,image_size = getPicsize(pic_file=_png)
            if w>h:
                if image_size < 1024000:
                    shutil.move(_png,dst2+"\\横图\\小于1M")
                else:
                    shutil.move(_png, dst2 + "\\横图\\大于等于1M")
            else:
                if image_size < 1024000:
                    shutil.move(_png, dst2 + "\\竖图\\小于1M")
                else:
                    shutil.move(_png, dst2 + "\\竖图\\大于等于1M")
        except Exception as e:
            print(e)

    for jp in jpeg:
        try:
            _jpeg = os.path.join(src,jp)
            w,h,image_size = getPicsize(pic_file=_jpeg)
            if w>h:
                if image_size < 1024000:
                    shutil.move(_jpeg,dst2+"\\横图\\小于1M")
                else:
                    shutil.move(_jpeg, dst2 + "\\横图\\大于等于1M")
            else:
                if image_size < 1024000:
                    shutil.move(_jpeg, dst2 + "\\竖图\\小于1M")
                else:
                    shutil.move(_jpeg, dst2 + "\\竖图\\大于等于1M")
        except Exception as e:
            print(e)

    for q in qita:
        try:
            _qita = os.path.join(src,q)
            shutil.move(_qita,dst3)
        except Exception as e:
            print(e)

if __name__ == "__main__":
    file = r"F:\存储盘\其他\people\image"
    paixuMd5(src=file)

 

标签:list,值去,same,dst2,获取,file,MD5,os,md5
来源: https://www.cnblogs.com/jingzaixin/p/16536399.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有