backup:更实用的大文件搜索

findbigfile.py

#!/usr/bin/env python
import sys, os

#100M
BIG_FILE_SIZE = 1024 * 1024 * 100
BIG_DIR_SIZE = int(os.popen('getconf PAGE_SIZE').read().strip()) * 1024
EXCLUDES = ['/home/cdn/wpt_data']

EXCLUDES = [os.path.normpath(p) for p in EXCLUDES]
stack = [os.path.normpath(sys.argv[1])] 

def format_size(s):
    if s < 1024:
        return '%d Bytes' % s
    if s < 1024 * 1024:
        return '%d KB' % (s/1024)
    if s < 1024 * 1024 * 1024:
    	return '%d MB' % (s/1024/1024)
    return '%d GB' % (s/1024/1024/1024)

while len(stack):
    check_file = stack.pop()

    if check_file in EXCLUDES:
        continue

    #pass link
    if os.path.islink(check_file):
        continue

    if os.path.isfile(check_file):
        s = os.path.getsize(check_file)
        if s > BIG_FILE_SIZE:
            print 'BIG FILE:%s\t%s' % (check_file, format_size(s))
        continue

    if os.path.isdir(check_file):
        s = os.path.getsize(check_file)

        #pass big directory
        if s > BIG_DIR_SIZE:
            print 'BIG DIR:%s\t%s' % (check_file, format_size(s))
            continue

        try:
            for f in os.listdir(check_file):
                stack.append(os.path.join(check_file, f))
        except:
            pass

加入讨论

电子邮件地址不会被公开。 必填项已用*标注

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据