-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfoldercompare.py
More file actions
136 lines (122 loc) · 4.66 KB
/
foldercompare.py
File metadata and controls
136 lines (122 loc) · 4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import sys
import os
import hashlib
import ast
import logging
SNAPSHOT = os.sep + "files.snapshot"
CHANGELOG = "change_list.log"
class FolderNotExistsException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
def _calc_hash(path, filename):
""" the algo build up the file snapshot(sha-1). """
full = path + os.sep + filename
with open(full,'rb') as f:
content = f.read()
f.close()
return hashlib.sha1(content).hexdigest()
# TODO: enhance the performance by sha-1(atime + mtime + ctime + size)
# (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(full)
# self.hash = hashlib.sha1(str(mtime) + str(size)).hexdigest()
def _build_snapshot(folder):
""" iterative all files and build snapshot dict, format: { relative path: hash }
we are using relative path to identifier the file in difference base folder.
ex:
c:\the_new_version_folder\src\core\logic\shoppingcart\calc.py
c:\the_old_version_folder\src\core\logic\shoppingcart\calc.py
|<------- base --------->| <------ relative_path ----------->|
"""
logger.debug("folder: %s , build snapshot" % folder)
file_hash = {}
for base, dirs, files in os.walk(folder):
for filename in files:
file_hash[base.replace(folder, '') + filename ] = _calc_hash(base, filename)
# build files.snapshot after scan
with open(folder + SNAPSHOT, 'w') as f:
f.write(str(file_hash))
f.close()
return file_hash
def _scan_folder(folder, snapshot = True):
# check folder exists
if not os.path.exists(folder):
raise FolderNotExistsException("folder: %s not found" % folder)
logger.info("scan folder: %s" % folder)
file_hash = {}
# lookup snapshot before scan
if snapshot:
if os.path.exists(folder + SNAPSHOT):
fmeta = open(folder + SNAPSHOT, 'r')
file_version = ast.literal_eval(fmeta.read())
fmeta.close()
logger.debug("folder: %s , read Snapshot" % folder)
return file_version
else:
logger.debug('Snapshot not found.')
file_hash = _build_snapshot(folder)
return file_hash
class DiffScanner(object):
""" compare 2 version folder, look up difference. """
def __init__(self, new_version, old_version, snapshot = True):
try:
self.logger = logging.getLogger(__name__)
self.logger.addHandler(hdlr)
self.logger.info("Compare: %s, %s" % (new_version, old_version))
self.new_version = _scan_folder(new_version, snapshot)
self.old_version = _scan_folder(old_version, snapshot)
except FolderNotExistsException as e:
raise e
def scan(self):
"""DiffScanner.scan()
compare two folder file's sha-1
return:
dist = {${relative_path}, ${status}}
${status} = [U|+|-]
U = update
+ = new files
- = remove files
"""
diff = {}
for key, new_file_hash in sorted(self.new_version.items()):
try:
# Updated file
if new_file_hash != self.old_version[key]:
diff[key] = "U"
self.logger.debug("%s %s \n" % ('[U]', key))
# Non-update file
else:
pass
except KeyError:
# New File
diff[key] = "+"
self.logger.debug("%s %s \n" % ('[+]', key))
for key in sorted(self.old_version):
# removed file
if not key in self.new_version:
diff[key] = "-"
self.logger.debug("%s %s \n" % ('[-]', key))
self.logger.info("Compare completed")
return diff
# logging configuration
logging.basicConfig(level = logging.INFO)
hdlr = logging.FileHandler('info.log')
format = '%(asctime)s %(levelname)s %(module)s.%(funcName)s():%(lineno)s %(message)s'
formatter = logging.Formatter(format)
hdlr.setFormatter(formatter)
logger = logging.getLogger(__name__)
logger.addHandler(hdlr)
if __name__ == "__main__":
if len(sys.argv) != 3:
print "========================================="
print "= Usage: $ python foldercompare.py ${new_version_path} ${old_version_path}"
print "========================================="
else:
new_path = sys.argv[1]
old_path = sys.argv[2]
x = DiffScanner(new_path, old_path, snapshot = False)
changelist = x.scan()
f = open(CHANGELOG, 'w')
for filename, status in changelist.items():
f.write("[%s] %s \n" % (status, filename))
f.close()