查找硬盘上的重复文件

软件和网站开发以及相关技术探讨
回复
头像
oneleaf
论坛管理员
帖子: 10441
注册时间: 2005-03-27 0:06
系统: Ubuntu 12.04

查找硬盘上的重复文件

#1

帖子 oneleaf » 2009-01-17 21:26

代码: 全选

#!/usr/bin/env python
#coding=utf-8
import binascii,os

filesizes={}
samefiles=[]
def filesize(path):
    if os.path.isdir(path):
        files=os.listdir(path)
        for file in files:
            filesize(path+"/"+file)
    else:
        size=os.path.getsize(path)
        if not filesizes.has_key(size):
             filesizes[size]=[]
        filesizes[size].append(path)

def filecrc(files):
    filecrcs={}
    for file in files:
        f=open(file,'r')
        crc = binascii.crc32(f.read())
        f.close()
        if not filecrcs.has_key(crc):
             filecrcs[crc]=[]
        filecrcs[crc].append(file)
    for filecrclist in filecrcs.values():
        if len(filecrclist)>1:
           samefiles.append(filecrclist)
            
if __name__ == "__main__":
    filesize("/home/ubuntu")
    for sizesamefilelist in filesizes.values():
        if len(sizesamefilelist)>1:
            filecrc(sizesamefilelist)
    for samefile in samefiles:
        print "******* same files group **********"
        for file in samefile:
            print file
回复