python-图片去重与归档脚本

  1. 解决问题
  2. 脚本内容

解决问题

对手机中图片与视频整理:

  1. 删除重复文件
  2. 根据文件创建时间重命名
  3. 根据文件创建年份归档到相应年份文件夹中

脚本内容

import random
from datetime import datetime
import hashlib
from pathlib2 import Path
import re
import exifread
from tqdm import tqdm


class Pic:
    md5s = []

    def __init__(self, source_dir, target_dir):
        self.source_dir = source_dir
        self.target_dir = target_dir

    def _get_shot_time(self, picture: Path) -> str:
        """获取图片信息"""
        # 获取exif属性
        with picture.open("rb") as f:
            img_exif = exifread.process_file(f)
        shot_time = img_exif.get('EXIF DateTimeOriginal')
        if shot_time:
            shot_time = datetime.strptime(str(shot_time), "%Y:%m:%d %H:%M:%S").strftime("%Y%m%d_%H%M%S")
        else:
            shot_time = datetime.fromtimestamp(picture.stat().st_mtime).strftime("%Y%m%d_%H%M%S")
        return shot_time

    def rename(self, picture: Path) -> Path:
        """图片重命名"""
        shot_time = self._get_shot_time(picture)
        new_name = shot_time + "_S" + str(random.randint(1, 9)) + str(random.randint(100000, 999999)) + picture.suffix
        q = picture.with_name(new_name)
        picture.rename(q)
        return q

    def _gen_md5(self, picture: Path) -> str:
        """返回图片md5值"""
        with picture.open("rb") as fp:
            md5 = hashlib.md5()
            while True:
                fb = fp.read(8096)
                if not fb:
                    break
                md5.update(fb)
        return md5.hexdigest()

    def add_md5s(self, picture: Path) -> bool:
        """
        添加不重复的文件md5到md5s列表中
        return:重复 False,不重复 True
        """
        md5_info = self._gen_md5(picture)
        if md5_info in self.md5s:
            picture.unlink()
            return False
        else:
            self.md5s.append(md5_info)
            return True

    def exist_date(self, picture: Path) -> bool:
        """判断图片命名是否带有日期
        图片命名格式 2020-11-30 或 20201130 或 2020_11_30
        """
        if re.search(r'^20[1-2][0-9][-|_]?\d{2}[-|_]?\d{2}', picture.stem):
            return True
        return False

    def tag(self, picture: Path) -> Path:
        year = picture.name[:4]
        q = Path(self.target_dir, year).resolve()
        q.mkdir(parents=True, exist_ok=True)
        picture.rename(q.joinpath(picture.name))
        return picture


if __name__ == '__main__':
    source_root = r"C:\图片1"
    target_root = r"C:\图片2"
    sr = Path(source_root)
    tr = Path(target_root)
    pic = Pic(source_root, target_root)

    # 先收集目标文件夹中的文件信息,用于去重判断
    for item in tr.glob('**/*'):
        if item.is_file():
            pic.add_md5s(item)

    # 循环处理源文件夹的图片
    pictures = [x for x in sr.iterdir() if x.is_file()]
    for item in tqdm(pictures):
        # 去重
        if pic.add_md5s(item):
            if not pic.exist_date(item):
                # 文件重命名
                item = pic.rename(item)
            # 归档
            pic.tag(item)

转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。
My Show My Code