Report abuse

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# This program will take backups of only your changed files from the filesystem into
# an external storage device like a USB Hard Drive. 
#
# Usage : python backupsets.py folder1 folder2......
#
# The algorithm is as follows
# 1)Look for .set in the folders to backup and load the set if it exists, if not initialize to an empty set
# 2)Recurse through folders to backup and create the latest version of .set
# 4)Compare the old and the new .set to determine what files/folders need to be backuped up
# 5)Backup
#
# This program will not work if the top dir or one of it's sub dirs/file is a  soft link

import os
import pickle
import sys
import shutil

BACKUP_DRIVE_PATH ="E:\\Guru\\backup"

def  dumpfileset(top_dir):
	"""
		This method creates the Set object with all the files/directories name under top_dir and
		persists the set object in top_dir overriding any previous .set objects
	"""
	fset = buildfileset(top_dir)
	set_file_path = os.path.join(top_dir,".set")
	print("Saving set file in "+set_file_path)
	pickle.dump(fset,open(set_file_path,"wb"))
	return 	fset		

def readfileset(top_dir):
	"""
		This method reads the persisted .set file into a Set from top_dir.If the .set file does not exist,
		an empty set is created
	"""
	try:
		fset = pickle.load(open(os.path.join(top_dir,".set"),"rb"))
	except IOError:
		fset = set()
	return fset

def buildfileset(top_dir):
	"""
		This method recurses through all the files and folders under top_dir to create a Set object with all file_name+~*~last_mod_ts.
		Using the Set data structure is inefficient but makes the program easier to write as a straight forward A - B gives the entire 
		list of files that have changed since the last backup was run.
		
		The reason using a Set is inefficient is because I'll have to compare all files between current and previous Sets which can be avoided 
		for certain cases if a Tree is used.For example if the top_dir has a new directory created ABC under it and a 1000 files were added -
		with the Set data structure the program checks for all 1000 files instead of just knowing that since ABC is a new directory all the 1000
		files underneath must be new
	"""
	fset = set()
	for root, dirs, files in os.walk(top_dir):
		all = dirs+files
		for file in all:
			full_path = os.path.join(root,file) 
			fset.add(full_path +"~*~"+str(os.stat(full_path).st_mtime))
	return 	fset		

def getbackuppath(file):
	"""
		Returns the path where the given file will be backed up
	"""
	file = file[3:] # Stripping C:/
	return os.path.join(BACKUP_DRIVE_PATH,file)

if __name__ == "__main__":

	if os.path.isdir(BACKUP_DRIVE_PATH) == False:
		print("Backup directory is not accessible.Exiting........"+BACKUP_DRIVE_PATH)
		exit(1)
		
	
	dirs_to_copy =[]
	files_to_copy = []

	for dir in sys.argv[1:]:
		old_set = readfileset(dir)
		new_set = dumpfileset(dir)
		diff_set = new_set - old_set
		for diff_file in diff_set:
			diff_file = diff_file[: diff_file.index('~')] # removing the timestamp part
			#Need to split the files from the directories because  directories need to be created first and also the order in which they are
			#created matters
			if  os.path.isdir(diff_file): 
				dirs_to_copy.append(diff_file)
			else:
				files_to_copy.append(diff_file)
	#Sorting the directories orders them so that parent directories are before child directories( Need to create them in that order)		
	dirs_to_copy = sorted(dirs_to_copy)

	#Create top level dirs in backup drive if they don't exist
	for dir in sys.argv[1:]:
		dir_backup_path =getbackuppath(dir)
		if os.path.isdir(dir_backup_path) == False:
			print("Mkdir "+dir_backup_path)
			os.makedirs(dir_backup_path) #recursively create directories

	#Create dirs that have changed if they don't exist
	for dir in dirs_to_copy:
		dir_backup_path = getbackuppath(dir)
		if os.path.isdir(dir_backup_path) == False:
			print("Mkdir "+dir_backup_path)
			os.mkdir(dir_backup_path)
				
	#Copy all changed files to backup place		
	for file in files_to_copy:
		file_backup_path = getbackuppath(file)
		print("Copying "+file_backup_path)
		shutil.copy(file,file_backup_path)