#!/usr/bin/env python import sys import math import os import re import time import stat import urllib import optparse import fpformat import unicodedata # -------------------------------------------------------------------------- class Options : def __init__ (self) : self.quick = False self.no_md5 = False self.run = "" self.first = [ ] self.second = [ ] self.foto = False self.video = False self.jpeg = False self.cr2 = False self.avi = False self.mpeg = False self.exclude = False self.large = False self.ext = False self.size = False self.sum = False self.only_old = False self.only_new = False self.show_equal = False self.show_dupl = False self.source = "" self.target = "" self.show_conflicts = False self.to_lower_case = False self.use_map = "" self.create_map = False self.copy = False self.xcopy = False self.cp = False self.mv = False self.rsync = False self.tar = False self.scp = False self.mkdir = False self.remove = False def ensure_value (self, name, value) : return getattr (self, name, value) def read_options () : parser = optparse.OptionParser () parser.add_option("-q", "--quick", dest="quick", action="store_true", help="print only summary") parser.add_option("-n", "--no-md5", dest="no_md5", action="store_true", help="do not compare md5 ") parser.add_option("-r", "--run", dest="run", help="method name") parser.add_option("-a", "--first", dest="first", action="append", help="first file group") parser.add_option("-b", "--second", dest="second", action="append", help="first file group") parser.add_option("-f", "--foto", dest="foto", action="store_true", help=".jpg, .jpeg and .cr2") parser.add_option("-v", "--video", dest="video", action="store_true", help=".avi, .mpg and .mpeg") parser.add_option("-j", "--jpg", "--jpeg", dest="jpeg", action="store_true", help=".jpg, .jpeg") parser.add_option("-c", "--cr2", dest="cr2", action="store_true", help=".cr2") parser.add_option("--avi", dest="avi", action="store_true", help=".avi") parser.add_option("--mpg", "--mpeg", dest="mpeg", action="store_true", help=".mpg, .mpeg") parser.add_option("-e", "--exclude", dest="exclude", action="store_true", help="exclude .info, .db, .thm and .scn") parser.add_option("-l", "--large", dest="large", action="store_true", help="only files >= 1MB") parser.add_option("--ext", dest="ext", action="store_true", help="extension summary") parser.add_option("--size", dest="size", action="store_true", help="size summary") parser.add_option("--sum", dest="sum", action="store_true", help="summary") parser.add_option("--old", dest="only_old", action="store_true", help="only old files") parser.add_option("--new", dest="only_new", action="store_true", help="only new files") parser.add_option("--equal", dest="show_equal", action="store_true", help="show equal files") parser.add_option("--dupl", dest="show_dupl", action="store_true", help="show duplicated files") parser.add_option("--from", "--source", dest="source", help="source directory") parser.add_option("--to", "--target", dest="target", help="target directory") parser.add_option("--conflicts", dest="show_conflicts", action="store_true", help="show name conflicts") parser.add_option("--lower", dest="to_lower_case", action="store_true", help="compare name case insensitive (need -n)") parser.add_option("--use-map", dest="use_map") parser.add_option("--create-map", dest="create_map", action="store_true") parser.add_option("--copy", dest="copy", action="store_true") parser.add_option("--xcopy", dest="xcopy", action="store_true") parser.add_option("--cp", dest="cp", action="store_true") parser.add_option("--mv", dest="mv", action="store_true") parser.add_option("--rsync", dest="rsync", action="store_true") parser.add_option("--tar", dest="tar", action="store_true") parser.add_option("--scp", dest="scp", action="store_true") parser.add_option("--mkdir", dest="mkdir", action="store_true") parser.add_option("--remove", dest="remove", action="store_true") global opt global arg opt = Options () (opt, arg) = parser.parse_args (values=opt) for a in arg : print "Unused argument", a # -------------------------------------------------------------------------- def decode_str (s) : return urllib.unquote (s) def decode_num (s) : if len (s) == 0 : return 0 else: return int (s) def decode_time (s) : return s # -------------------------------------------------------------------------- Normal = 0 Old = 1 New = 2 Modified = 3 Equal = 4 Duplicated = 5 class FileInfo : def __init__ (self) : self.kind = Normal self.name = "" self.size = 0 self.mtime = "" self.md5 = "" self.mode = Normal self.e_model = "" self.e_number = "" self.e_width = 0 self.e_height = 0 self.e_time = "" # -------------------------------------------------------------------------- def readLine (line) : "Read one line from scan-file, return FileInfo" result = FileInfo () items = line.split () inx = 1 for item in items : if item.startswith ("md5=") : result.md5 = item[4:] elif item.startswith ("name=") : result.name = decode_str (item[5:]) elif item.startswith ("size=") : result.size = decode_num (item[5:]) elif item.startswith ("mode=") : result.mode = item[5:] elif item.startswith ("uid=") : result.uid = decode_num (item[4:]) elif item.startswith ("gid=") : result.gid = decode_num (item[4:]) elif item.startswith ("atime=") : result.atime = decode_time (item[6:]) elif item.startswith ("ctime=") : result.ctime = decode_time (item[6:]) elif item.startswith ("mtime=") : result.mtime = decode_time (item[6:]) elif item.startswith ("e_model=") : result.e_model = decode_str (item[8:]) elif item.startswith ("e_number=") : result.e_number = decode_str (item[9:]) elif item.startswith ("e_width=") : result.e_width = decode_num (item[8:]) elif item.startswith ("e_height=") : result.e_height = decode_num (item[9:]) elif item.startswith ("e_time=") : result.e_time = decode_time (decode_str (item[7:])) elif inx == 1 : result.md5 = item inx = inx + 1 elif inx == 2 : result.name = decode_str (item) inx = inx + 1 elif inx == 3 : result.size = decode_num (item) inx = inx + 1 elif inx == 4 : result.mtime = decode_time (item) inx = inx + 1 return result def readFile (collection, fileName) : file = open (fileName, "r") for line in file : data = readLine (line) if data.name != "" and not bad_md5 (data.md5) : # and valid_md5 (data.md5) : collection.append (data) file.close () # -------------------------------------------------------------------------- def conv_time (t) : s = time.gmtime (t) return time.strftime ("%Y-%m-%d:%H:%M:%S", s) def scan_dir (collection, top_dir, rel_dir) : dir_name = os.path.join (top_dir, rel_dir) subitems = os.listdir (dir_name) subitems.sort () for loc_file_name in subitems : rel_file_name = os.path.join (rel_dir, loc_file_name) file_name = os.path.join (top_dir, rel_file_name) if os.path.isdir (file_name) : scan_dir (collection, top_dir, rel_file_name) else : info = os.stat (file_name); result = FileInfo () result.name = rel_file_name result.size = info [stat.ST_SIZE] result.mtime = decode_time (conv_time (info [stat.ST_MTIME])) collection.append (result) # -------------------------------------------------------------------------- def scan_lslR (collection, file_name) : file = open (file_name, "r") directory = "" for line in file : line = line.strip () if line == "" : pass elif line.endswith (":") : directory = line [0:-1] if directory == "." : directory = "" elif directory.startswith ("./") : directory = directory [2:] elif line.startswith ("total ") : pass else : items = line.split (None, 8) item_count = len (items) name = "" if item_count == 1 : name = items [0] # first column else : inx = 8 if item_count > 5 : mask = "[0-9][0-9]([0-9][0-9])?-[0-9]([0-9])?-[0-9]([0-9])?$" if re.search (mask, items [5]) : inx = 7 if inx < item_count : name = items [inx] i = inx + 1 while i < item_count : name = name + " " + items [i] # names with spaces i = i + 1 # print items, "inx=", inx, "name=", name if name.endswith ('/') or name.endswith ('*') : name = name [0:-1] is_dir = False if item_count >= 8: is_dir = items[0].startswith('d') if name != "" and not is_dir : result = FileInfo () result.name = os.path.join (directory, name) if item_count != 1 : result.size = decode_num (items [4]) result.mtime = decode_time (items[5] + ':' + items [6]) # items [0] ... mode # items [1] ... link count # items [2] ... uid # items [3] ... gid # items [4] ... size # items [5] ... date yyyy-mm-dd # items [6] ... time hh:ss # items [7] ... file name # items [5] ... month # items [6] ... day # items [7] ... time # items [8] ... file name # items [5] ... month # items [6] ... day # items [7] ... year # items [8] ... file name collection.append (result) file.close () # -------------------------------------------------------------------------- def valid_md5 (md5) : return md5 != "" and md5[0] != '-' and md5[0] != '!'; def bad_md5 (md5) : return md5 != "" and md5[0] == '!'; def lookup (a, b) : "Lookup files from a in b" cache = { } for f in a : if f.md5 in cache : f.mode = Duplicated else : f.mode = Old if valid_md5 (f.md5) : cache [f.md5] = f second_cache = { } for g in b : if g.md5 in cache : f = cache [g.md5] if f.mode == Old : f.mode = Equal if g.md5 in second_cache : g.mode = Duplicated else : g.mode = Equal if valid_md5 (g.md5) : second_cache [g.md5] = g else : if g.md5 in second_cache : g.mode = Duplicated else : g.mode = New if valid_md5 (g.md5) : second_cache [g.md5] = g # -------------------------------------------------------------------------- def get_lookup_key (f) : ident = os.path.basename (f.name) if opt.to_lower_case : ident = ident.lower () key = (ident, f.size) return key def lookup_by_name_and_size (a, b) : for f in a : f.ident = get_lookup_key (f) for f in b : f.ident = get_lookup_key (f) cache = { } for f in a : if f.ident in cache : f.mode = Duplicated else : cache [f.ident] = f f.mode = Old second_cache = { } for g in b : if g.ident in cache : f = cache [g.ident] if f.mode == Old : f.mode = Equal if g.ident in second_cache : g.mode = Duplicated else : second_cache [g.ident] = g g.mode = Equal else : if g.ident in second_cache : g.mode = Duplicated else : g.mode = New second_cache [g.ident] = g # -------------------------------------------------------------------------- def get_conflict_key (f) : ident = os.path.basename (f.name) # key = (ident) key = (ident, f.size) # key = (ident, f.size, f.mtime) return key def print_conflicts (a, b) : print cache = { } conflict_cache = { } conflict_list = [ ] scan_list = [ ] scan_again_list = [ ] for f in a : key = get_conflict_key (f) if not (key in cache) : cache [key] = f else : other = cache [key] if f.md5 != other.md5 : print "Conflict", key print " first file: ", other.name, other.md5 print " second file:", f.name, f.md5 conflict_list.append (f) conflict_cache [key] = True for f in b : key = get_conflict_key (f) if not (key in cache) or (key in conflict_cache) : if f.mode != New : print "scan again", f.name scan_again_list.append (f) scan_list.append (f) print summary (conflict_list, "Conflicts") summary (scan_list, "Scan") summary (scan_again_list, "Scan again") summary (b, "All") # -------------------------------------------------------------------------- def directory_name (s) : return os.path.dirname (s.replace ("\\", "/")) def print_map (a, b) : md5_cache = { } dir_cache = { } for g in b : if valid_md5 (g.md5) : md5_cache [g.md5] = g for f in a : if f.md5 in md5_cache : g = md5_cache [f.md5] s = directory_name (f.name) # source if not (s in dir_cache) : dir_cache [s] = { } # map to map p = dir_cache [s] # map t = directory_name (g.name) # target if not (t in p) : p[t] = 0 p[t] += 1 for s in sorted (dir_cache.keys ()) : p = dir_cache [s] # map cnt = 0 for t in sorted (p.keys ()) : cnt = cnt + 1 if cnt == 1 : print s, else : print " ", print "->", t, "(", p[t], ")" # -------------------------------------------------------------------------- def ascii_file_name (n) : text = unicode (n, "cp1250") result = u"" for c in text : if c < ' ' or c > '\x7f' : r = unicodedata.normalize ("NFD", c) [0] elif c == '*' or c == '?': r = '_' elif c == ' ' : r = ' ' else : r = c result = result + r return result def target_file_name (n) : n = ascii_file_name (n) return "\"" + n.replace ("/", "\\") + "\"" def linux_file_name (n) : return "\"" + n.replace ("\\", "/") + "\"" def win_file_name (n) : # n = unicode (n, "cp1250") return "\"" + n.replace ("/", "\\") + "\"" # -------------------------------------------------------------------------- def recursive_make_dir (cache, d) : if not (d in cache) : sub_dir = directory_name (d) if sub_dir != "" : recursive_mkdir (cache, sub_dir) cache [d] = 1 print "mkdir", target_file_name (os.path.join (opt.target, d)) def copy_commands (a, b) : if opt.mkdir : print "mkdir", target_file_name (opt.target) cache = { } for f in a : if f.mode == Old : d = directory_name (f.name) recursive_make_dir (cache, d) s = win_file_name (os.path.join (opt.source, f.name)) t = target_file_name (os.path.join (opt.target, d)) print "copy ", s, t if opt.remove : for f in b : if f.mode == New : t = target_file_name (os.path.join (opt.target, f.name)) print "del ", t # -------------------------------------------------------------------------- def recursive_mkdir (cache, d) : if not (d in cache) : sub_dir = directory_name (d) if sub_dir != "" : recursive_mkdir (cache, sub_dir) cache [d] = 1 s = target_file_name (os.path.join (opt.target, d)) print "test -d", s, "|| mkdir", s def cp_commands (a, b) : if opt.mkdir : print "mkdir", target_file_name (opt.target) cache = { } for f in a : if f.mode == Old : d = directory_name (f.name) recursive_mkdir (cache, d) s = linux_file_name (os.path.join (opt.source, f.name)) t = target_file_name (os.path.join (opt.target, f.name)) print "cp -n ", s, t if opt.remove : for f in b : if f.mode == New : t = target_file_name (os.path.join (opt.target, f.name)) print "rm ", t def mv_commands (a) : cache = { } for f in a : if f.mode == Old : d = directory_name (f.name) recursive_mkdir (cache, d) s = linux_file_name (os.path.join (opt.source, f.name)) t = target_file_name (os.path.join (opt.target, f.name)) print "mv -n ", s, t # -------------------------------------------------------------------------- def show_size (size) : return fpformat.fix (float (size) / 1000000, 1) + "MB" def fmt (num) : txt = str (num) n = len (txt) - 1 k = 0 while n > 0 : k = k + 1 if k == 3 : txt = txt [:n] + ' ' + txt [n:] k = 0 n = n - 1 return txt def print_summary (txt, cnt, sum): print txt.rjust (32), fmt (cnt).rjust (7), "files,", fmt (sum).rjust (17), "bytes" def summary (a, txt): cnt = 0 sum = 0 for f in a : cnt += 1 sum += f.size print_summary (txt, cnt, sum) def summary2 (a, txt, m): cnt = 0 sum = 0 for f in a : if f.mode == m : cnt += 1 sum += f.size print_summary (txt, cnt, sum) # -------------------------------------------------------------------------- def extension_summary (a, txt, m): cnt = { } sum = { } for f in a : if f.mode == m : base, ext = os.path.splitext (f.name) ext = ext.lower () if not (ext in cnt) : cnt [ext] = 0 sum [ext] = 0 cnt [ext] += 1 sum [ext] += f.size print for ext in sorted (cnt.keys ()) : t = ext if t == "" : t = "no extension" print_summary (t + " (" + txt + ")", cnt[ext], sum[ext]) # -------------------------------------------------------------------------- def length_summary (a): cnt = { } for f in a : if not (f.size in cnt) : cnt [f.size] = 0 cnt [f.size] += 1 print for grp in sorted (cnt.keys ()) : if cnt [grp] > 1 : print cnt[grp], "files", grp, "bytes" # -------------------------------------------------------------------------- def size_summary (a, txt, m): cnt = { } sum = { } for f in a : if f.mode == m : if f.size < 1000 * 1000 : n = 100 * 1000 elif f.size < 10 * 1000 * 1000 : n = 1000 * 1000 elif f.size < 100 * 1000 * 1000 : n = 10 * 1000 * 1000 else : n = 100 * 1000 * 1000 grp = (f.size / n + 1) * n if not (grp in cnt) : cnt [grp] = 0 sum [grp] = 0 cnt [grp] += 1 sum [grp] += f.size print for grp in sorted (cnt.keys ()) : if grp < 1000 * 1000 : t = "<= " + str (grp/1000) + " KB" else : t = "<= " + str (grp/(1000*1000)) + " MB" print_summary (t + " (" + txt + ")", cnt[grp], sum[grp]) # -------------------------------------------------------------------------- def ignore_name (name) : # return False return ( name.startswith ("System Volume Information\\") or name.startswith ("Recycled\\") ) # -------------------------------------------------------------------------- def with_suffix (name, suffixes) : result = False for e in suffixes : if name.endswith (e) : result = True return result def with_mask (name, mask) : return re.search (mask, name) != None def compare_by_name (x, y) : return cmp (x.name, y.name) # -------------------------------------------------------------------------- def listing (aa, bb): if opt.show_dupl : for f in aa : if f.mode == Duplicated : print "Duplicated file in old directory ", f.name if not opt.only_new : for f in aa : if f.mode == Old : print "File only in old directory ", f.name, show_size (f.size) if not opt.only_old : for f in bb : if f.mode == New : print "File only in new directory ", f.name, show_size (f.size) if opt.show_dupl : for f in bb : if f.mode == Duplicated : print "Duplicated file in new directory ", f.name if 0 : for f in aa : if f.mode == Equal : print "Equal old file ", f.name if opt.show_equal : for f in bb : if f.mode == Equal : print "Equal new file ", f.name # -------------------------------------------------------------------------- def print_lookup (a, b, exclude = [ ], include = [ ], mask = "", min_size = 0) : use_include = False for f in include: use_include = True use_mask = (mask != "") aa = [ ] for f in a : if not ignore_name (f.name) : if not with_suffix (f.name, exclude) : if not use_include or with_suffix (f.name, include) : if not use_mask or with_mask (f.name, mask) : if f.size >= min_size : aa.append (f) bb = [ ] for f in b : if not ignore_name (f.name) : if not with_suffix (f.name, exclude) : if not use_include or with_suffix (f.name, include) : if not use_mask or with_mask (f.name, mask) : if f.size >= min_size : bb.append (f) aa.sort (compare_by_name) bb.sort (compare_by_name) cmds = opt.copy or opt.xcopy or opt.cp or opt.mv or opt.rsync or opt.tar or opt.scp if not opt.quick and not cmds and not opt.show_conflicts and not opt.create_map : listing (aa, bb) if opt.size : size_summary (aa, "old", Old) size_summary (bb, "new", New) size_summary (aa, "equal", Equal) if opt.ext : extension_summary (aa, "old", Old) extension_summary (bb, "new", New) extension_summary (aa, "equal", Equal) if opt.sum or not cmds : print summary (a, "Old files:") summary (aa, "Selected old files:") summary (b, "New files:") summary (bb, "Selected new files:") print summary2 (aa, "Duplications in old directory:", Duplicated) summary2 (bb, "Duplications in new directory:", Duplicated) summary2 (aa, "Only in old directory:", Old) summary2 (bb, "Only in new directory:", New) summary2 (aa, "Equal:", Equal) summary2 (bb, "Equal:", Equal) if opt.show_conflicts : print_conflicts (aa, bb) if opt.create_map : print_map (aa, bb) if opt.copy : copy_commands (aa, bb) if opt.cp : cp_commands (aa, bb) if opt.mv : mv_commands (aa) # -------------------------------------------------------------------------- class Command: def __init__ (self) : self.first_set = [ ] self.second_set = [ ] self.current_set = self.first_set self.include_set = [ ] self.exclude_set = [ ] self.mask_set = "" self.min_size = 0 global cmd cmd = Command () def only (ext): for e in ext: cmd.include_set.append (e) cmd.include_set.append (e.upper ()) def exclude (ext): for e in ext: cmd.exclude_set.append (e) cmd.exclude_set.append (e.upper ()) def mask (m): cmd.mask_set = m def minimal_size (n): cmd.min_size = n def read (name): readFile (cmd.current_set, name) def read_dir (name): scan_dir (cmd.current_set, name, "") def read_lslR (name): scan_lslR (cmd.current_set, name) def switch (): cmd.current_set = cmd.second_set def compare (): if opt.no_md5 : lookup_by_name_and_size (cmd.first_set, cmd.second_set) # without md5 sums else : lookup (cmd.first_set, cmd.second_set) # with md5 sums print_lookup (cmd.first_set, cmd.second_set, cmd.exclude_set, cmd.include_set, cmd.mask_set, cmd.min_size) # -------------------------------------------------------------------------- def all7 () : read ("data/duo-diskn-foto.txt") # !? read ("data/duo-diskn-video.txt") # !? # NO read ("data/wd-backup.txt") # !? read ("data/wd-foto-canon-powershot-a70.txt") read ("data/wd-foto-eos-foto-2008.txt") read ("data/wd-foto-fotoarchiv.txt") read ("data/wd-foto-raw.txt") read ("data/wd-foto-selection-archive.txt") read ("data/wd-video.txt") read ("data2/wd-foto-2009-02-01.txt") read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") # NO read ("data3/wd-backup-2009-04-25.txt") # !? read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") read ("data3/wd-prev-2009-04-25.txt") # !? read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") read ("data4/duo-diskn-dalsi-2009-06-28.txt") # !? read ("data4/duo-diskn-fotoarchiv-2009-06-28.txt") # !? read ("data5/blue-fotky-2009-08-23.txt") read ("data5/duo-store-foto-2009-08-22.txt") # !? read ("data5/duo-diskn-video-2009-08-22.txt") # !? read ("data5/duo-store-transfer-disk_n-fotoarchiv-2009-08-22.txt") # !? read ("data6/duo-diskn-foto-2009-10-04.txt") # !? read ("data6/duo-diskn-foto-orig-2009-10-04.txt") # !? read ("data7/duo-diskn-foto-2009-10-18.txt") # !? read ("data7/duo-diskn-fotoarchiv-2009-10-18.txt") # !? read ("data7/duo-diskn-video-2009-10-18.txt") # !? read ("data7/duo-store-fotografie-diskf-2009-10-17.txt") # !? read ("data7/duo-store-fotografie-diskg-2009-10-17.txt") # !? read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") def all8 () : all7 () read ("data8/duo-diskn-fotoarchiv-2009-11-07.txt") read ("data8/duo-diskn-video-2009-11-07.txt") # !? # NO read ("data8/wd-backup-2009-11-21.txt") # !? read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") def all9 () : all8 () read ("data9/amd-diskf-2010-06-26.txt") read ("data9/amd-diskg-2010-06-26.txt") # NO read ("data9/wd-backup-2010-06-27.txt") # !? read ("data9/wd-doplnky-2010-06-27.txt") read ("data9/wd-foto-2010-06-27.txt") read ("data9/wd-fotoarchiv-2010-06-27.txt") read ("data9/wd-grafika-2010-06-27.txt") read ("data9/wd-video-2010-06-27.txt") read ("data9/pentium4-diskf-2010-06-18.txt") read ("data9/pentium4-diskg-2010-06-18.txt") def all10 () : all9 () read ("data10/branik-store1-fotoarchiv-2010-10-28.txt") read ("data10/branik-store1-video-2010-10-28.txt") read ("data10/branik-store2-foto-2010-10-28.txt") read ("data10/branik-store2-video-2010-10-28.txt") read ("data10/white-diskf-2010-11-14.txt") read ("data10/white-diskg-2010-11-14.txt") def all11 () : all10 () read ("data11/wd-check-2011-04-17.txt") read ("data11/wd-doplnky-2011-04-17.txt") read ("data11/wd-foto-2011-04-17.txt") read ("data11/wd-fotoarchiv-2011-04-17.txt") read ("data11/wd-grafika-2011-04-17.txt") read ("data11/wd-new-2011-04-17.txt") read ("data11/wd-video-2011-04-17.txt") def all12 () : all11 () read ("data12/pentium4-diskf-2012-01-20.txt") read ("data12/pentium4-diskg-2012-01-20.txt") read ("data12/red-diskf-2012-01-22.txt") read ("data12/red-diskg-2012-01-22.txt") read ("data12/white-diskf-2012-01-20.txt") read ("data12/white-diskf2011-2012-01-20.txt") read ("data12/white-diskg-2012-01-20.txt") def all14 () : all12 () read ("data14/silver-foto-2012-05-30.txt") read ("data14/silver-foto2-2012-05-30.txt") read ("data14/silver-fotoarchiv-2012-05-30.txt") read ("data14/silver-video-2012-05-30.txt") read ("data14/white-diskf2011-2012-06-02.txt") read ("data14/white-diskg-2012-06-02.txt") def all15 () : all14 () read ("data15/blue-diske-2012-10-21.txt") read ("data15/blue-diskf-2012-10-21.txt") read ("data15/blue-diskg-2012-10-21.txt") read ("data15/usb3-foto-2012-12-23.txt") def all16 () : all15 () read ("data16/red-diske-2015-06-18.txt") read ("data16/red-diskf-2015-06-18.txt") read ("data16/red-diskg-2015-06-18.txt") def all () : all16 () # -------------------------------------------------------------------------- def compare_all_8 () : all7 () switch () read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () def compare_all_9 () : all8 () switch () read ("data9/wd-doplnky-2010-06-27.txt") read ("data9/wd-foto-2010-06-27.txt") read ("data9/wd-fotoarchiv-2010-06-27.txt") read ("data9/wd-grafika-2010-06-27.txt") read ("data9/wd-video-2010-06-27.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () def compare_all_11 () : all10 () switch () read ("data11/wd-check-2011-04-17.txt") read ("data11/wd-doplnky-2011-04-17.txt") read ("data11/wd-foto-2011-04-17.txt") read ("data11/wd-fotoarchiv-2011-04-17.txt") read ("data11/wd-grafika-2011-04-17.txt") read ("data11/wd-new-2011-04-17.txt") read ("data11/wd-video-2011-04-17.txt") read ("data14/white-diskf2011-2012-06-02.txt") read ("data14/white-diskg-2012-06-02.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () def compare_all_15 () : all14 () switch () read ("data15/blue-diske-2012-10-21.txt") read ("data15/blue-diskf-2012-10-21.txt") read ("data15/blue-diskg-2012-10-21.txt") # read ("data15/usb3-foto-2012-12-23.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () def compare_all_16 () : all15 () switch () read ("data16/red-diske-2015-06-18.txt") read ("data16/red-diskf-2015-06-18.txt") read ("data16/red-diskg-2015-06-18.txt") exclude ( [ ".info", ".thm", ".scn", ".db", ".xls", ".exe", ".doc", ".pdf", ".zip" ] ) compare () # -------------------------------------------------------------------------- def wd3 () : read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") def wd4 () : read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") def wd8 () : read ("data8/wd-foto-2009-11-08.txt") read ("data8/wd-fotoarchiv-2009-11-08.txt") read ("data8/wd-video-2009-11-08.txt") def wd9 () : read ("data9/wd-foto-2010-06-27.txt") read ("data9/wd-fotoarchiv-2010-06-27.txt") read ("data9/wd-video-2010-06-27.txt") read ("data9/wd-doplnky-2010-06-27.txt") def wd11 () : read ("data11/wd-check-2011-04-17.txt") read ("data11/wd-doplnky-2011-04-17.txt") read ("data11/wd-foto-2011-04-17.txt") read ("data11/wd-fotoarchiv-2011-04-17.txt") read ("data11/wd-grafika-2011-04-17.txt") read ("data11/wd-new-2011-04-17.txt") read ("data11/wd-video-2011-04-17.txt") def blue5 () : read ("data5/blue-fotky-2009-08-23.txt") def blue () : read ("data15/blue-diske-2012-10-21.txt") read ("data15/blue-diskf-2012-10-21.txt") read ("data15/blue-diskg-2012-10-21.txt") def red7 () : read ("data7/red-diskf-2009-10-17.txt") read ("data7/red-diskg-2009-10-17.txt") def red12 () : read ("data12/red-diskf-2012-01-22.txt") read ("data12/red-diskg-2012-01-22.txt") def red () : read ("data16/red-diske-2015-06-18.txt") read ("data16/red-diskf-2015-06-18.txt") read ("data16/red-diskg-2015-06-18.txt") def white () : read ("data14/white-diskf2011-2012-06-02.txt") read ("data14/white-diskg-2012-06-02.txt") def amd () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") def amd9 () : read ("data9/amd-diskf-2010-06-26.txt") read ("data9/amd-diskg-2010-06-26.txt") def pentium () : read ("data12/pentium4-diskf-2012-01-20.txt") read ("data12/pentium4-diskg-2012-01-20.txt") def duo () : read ("data14/duo-diskn-fotoarchiv.txt") read ("data14/duo-diskn-video.txt") read ("data14/duo-store-canon.txt") read ("data14/duo-store-newf.txt") read ("data14/duo-store-newg.txt") read ("data14/duo-store-newg2.txt") def branik () : read ("data10/branik-store1-fotoarchiv-2010-10-28.txt") read ("data10/branik-store1-video-2010-10-28.txt") read ("data10/branik-store2-foto-2010-10-28.txt") read ("data10/branik-store2-video-2010-10-28.txt") def silver () : read ("data14/silver-foto-2012-05-30.txt") read ("data14/silver-foto2-2012-05-30.txt") read ("data14/silver-fotoarchiv-2012-05-30.txt") read ("data14/silver-video-2012-05-30.txt") def ku () : read ("data-coreduo24/local-additional-2011-06-04.txt") read ("data-coreduo24/local-extra-foto-2011-06-04.txt") read ("data-coreduo24/local-extra-fotoarchiv-2011-06-04.txt") read ("data-coreduo24/local-extra-grafika-2011-06-04.txt") read ("data-coreduo24/local-extra-new-2011-06-04.txt") read ("data-coreduo24/local-store-doplnky-2011-06-04.txt") read ("data-coreduo24/local-store-fotoarchiv-2011-06-04.txt") read ("data-coreduo24/local-video-2011-06-04.txt") def blue5 () : read ("data5/blue-fotky-2009-08-23.txt") # base set: wd11, white, blue # nothing new: ku, branik, amd, amd9, pentium, usb3 # some files: # white (3 files, 10 MB) # red (1 file, 4 MB) # blue5 (24 files, 32 MB) # silver (27 files, 41 MB) # duo (197 files, 953 MB) # -------------------------------------------------------------------------- def wd8f () : read ("data8/wd-foto-2009-11-08.txt") def wd8a () : read ("data8/wd-fotoarchiv-2009-11-08.txt") def wd8v () : read ("data8/wd-video-2009-11-08.txt") def wd8b () : read ("data8/wd-backup-2009-11-21.txt") # -------------------------------------------------------------------------- def wd9f () : read ("data9/wd-foto-2010-06-27.txt") def wd9a () : read ("data9/wd-fotoarchiv-2010-06-27.txt") def wd9v () : read ("data9/wd-video-2010-06-27.txt") def wd9d () : read ("data9/wd-doplnky-2010-06-27.txt") def wd9b () : read ("data9/wd-backup-2010-06-27.txt") # -------------------------------------------------------------------------- def wd11f () : read ("data11/wd-foto-2011-04-17.txt") def wd11a () : read ("data11/wd-fotoarchiv-2011-04-17.txt") def wd11v () : read ("data11/wd-video-2011-04-17.txt") def wd11d () : read ("data11/wd-doplnky-2011-04-17.txt") def wd11n () : read ("data11/wd-new-2011-04-17.txt") # -------------------------------------------------------------------------- def p12f () : read ("data12/pentium4-diskf-2012-01-20.txt") def p12g () : read ("data12/pentium4-diskg-2012-01-20.txt") # -------------------------------------------------------------------------- def r12f () : read ("data12/red-diskf-2012-01-22.txt") def r12g () : read ("data12/red-diskg-2012-01-22.txt") # -------------------------------------------------------------------------- def w14f () : read ("data14/white-diskf2011-2012-06-02.txt") def w14g () : read ("data14/white-diskg-2012-06-02.txt") # -------------------------------------------------------------------------- def b15f () : read ("data15/blue-diskf-2012-10-21.txt") def b15g () : read ("data15/blue-diskg-2012-10-21.txt") # -------------------------------------------------------------------------- def r16f () : read ("data16/red-diskf-2015-06-18.txt") def r16g () : read ("data16/red-diskg-2015-06-18.txt") # -------------------------------------------------------------------------- def s1a () : read ("data10/branik-store1-fotoarchiv-2010-10-28.txt") def s1v () : read ("data10/branik-store1-video-2010-10-28.txt") def s2f () : read ("data10/branik-store2-foto-2010-10-28.txt") def s2v () : read ("data10/branik-store2-video-2010-10-28.txt") # -------------------------------------------------------------------------- def compare_wd3 () : read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") switch () read ("data3/wd-foto-2009-04-25.txt") read ("data3/wd-video-2009-04-25.txt") exclude ( [ ".info" ] ) compare () def compare_wd4 () : read ("data4/pentium4-diskf-2009-06-27.txt") read ("data4/pentium4-diskg-2009-06-27.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") exclude ( [ ".info" ] ) compare () def compare_amd () : read ("data3/amd-diskf-2009-04-25.txt") read ("data3/amd-diskg-2009-04-25.txt") switch () read ("data3/pentium4-diskf-2009-04-25.txt") read ("data3/pentium4-diskg-2009-04-25.txt") exclude ( [ ".info" ] ) compare () def compare_backup () : read ("data3/wd-backup-2009-04-25.txt") read ("data3/wd-prev-2009-04-25.txt") switch () read ("data4/wd-foto-2009-06-27.txt") read ("data4/wd-video-2009-06-27.txt") exclude ( [ ".info" ] ) compare () # -------------------------------------------------------------------------- def compare_blue () : read ("data4/duo-diskn-fotoarchiv-2009-06-28.txt") read ("data5/duo-diskn-video-2009-08-22.txt") read ("data5/duo-store-foto-2009-08-22.txt") switch () read ("data5/blue-fotky-2009-08-23.txt") compare () # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}\\." ) # mask ( "/[a-zA-Z0-9]{3}_[0-9]{4}" ) # mask ( "/[iI][mM][gG]_[0-9]{4}" ) # -------------------------------------------------------------------------- def collections () : dir_name = "./data-collections"; subitems = os.listdir (dir_name) subitems.sort () for local_name in subitems : if local_name.endswith (".lslr") or local_name.endswith (".lslR") : file_name = os.path.join (dir_name, local_name) if os.path.isfile (file_name) : read_lslR (file_name) def news () : read_lslR ("./data-local/news.lslR") def download () : read_lslR ("./data-local/download.lslR") # read_lslR ("./data-local/download0.lslR") # -------------------------------------------------------------------------- def compare_by_e (x, y) : result = cmp (x.e_model, y.e_model) if result == 0 : result = cmp (x.e_number, y.e_number) if result == 0 : result = cmp (x.e_time, y.e_time) return result def check_stamps (a, key, n1, n2) : cnt = 0 for f in a : if f.e_model.find (key) != -1 : cnt = cnt + 1 if f.name.find (n1) != -1 or f.name.find (n2) != -1 : print "Inconsitent name: ", f.e_model, f.e_number, f.name print key, cnt def read_stamps () : a = [] # readFile (a, "data7/exif-duo-store-diskf-2009-10-24.txt") # readFile (a, "data7/exif-duo-store-diskg-fotoarchiv-2009-10-24.txt") readFile (a, "data8/wd-foto-2009-11-08.txt") readFile (a, "data8/wd-fotoarchiv-2009-11-08.txt") cache = { } for f in a : if f.md5 in cache : g = cache [f.md5] eq = f.e_model == g.e_model eq = eq and f.e_number == g.e_number eq = eq and f.e_width == g.e_width eq = eq and f.e_height == g.e_height eq = eq and f.e_time == g.e_time if not eq : print "Inconsitent info: ", f.name, f.e_model, f.e_number, f.e_width, f.e_height, f.e_time else : if valid_md5 (f.md5) : cache [f.md5] = f if 0 : queue = [ ] for f in a : queue.append (f) queue.sort (compare_by_e) for f in queue : print f.e_number, f.e_width, f.e_height, f.e_model, f.e_time, f.name if 0 : check_stamps (a, "A70", "A200", "EOS") check_stamps (a, "A2000", "EOS", "A70") check_stamps (a, "EOS", "A2000", "A70") if 0 : mask = "/[a-zA-Z0-9]{3}_[0-9]{4}\\." for f in a : if f.e_model != "" : if re.search (mask, f.name) : print "Short name", f.e_model, f.e_number, f.name # -------------------------------------------------------------------------- def load (n) : fmt = "" if n.startswith("proc:") : fmt = "proc" n = n [5:] elif n.startswith("file:") : fmt = "proc" n = n [5:] elif n.startswith ("lslr:") : fmt = "lslr" n = n [5:] elif n.startswith ("dir:") : fmt = "dir" n = n [4:] elif n.endswith (".lslr") or n.endswith (".lslR") : fmt = "lslr" elif re.match ("^[a-zA-Z-][a-zA-Z0-9_]*$", n) : fmt = "proc" else : fmt = "file" if fmt == "proc" : eval (n + "()") elif fmt == "file" : read (n) elif fmt == "lslr" : read_lslR (n) elif fmt == "dir" : read_dir (n) else : print "Unknown format", fmt, n exit () # -------------------------------------------------------------------------- def command_line_options (): if opt.exclude : exclude ( [ ".info", ".db", ".thm", ".scn" ] ) if opt.large : minimal_size ( 1000 * 1000 ) if opt.foto : only ( [ ".jpg", ".jpeg", ".cr2" ] ) if opt.video : only ( [ ".avi", ".mpg", ".mpeg" ] ) if opt.jpeg : only ( [ ".jpg", ".jpeg" ] ) if opt.cr2 : only ( [ ".cr2" ] ) if opt.avi : only ( [ ".avi" ] ) if opt.mpeg : only ( [ ".mpg", ".mpeg" ] ) # -------------------------------------------------------------------------- def main () : read_options () # read_stamps () command_line_options () if opt.run == "" : for n in opt.first : load (n) switch () for n in opt.second : load (n) compare () if opt.run != "" : eval (opt.run + "()") if __name__ == '__main__' : main () # --------------------------------------------------------------------------