At work, at some point I had to clean up old stuff so that we could use it as a base for the next project. I needed a script that would search the *.sprite filenames in a given .game file, which was just a simple file with references to those sprite filenames.
If I remember well, it executed fast enough (in about 100ms for our whole sprite folders). TODO note to self: explain the algorithm!
import sys import re import os class UsedSpritesLister: """ Class that contains util functions to find and list the sprites that are not used in a .game file """ def __init__ (self): # members that need to be set for the processing to work self.input_sprite_path = "" self.input_gamelevels_path = "" self.ignoreDirSet = set(['.svn']) self.fileExtSet = set(['.level']) self.spriteNameSet = set() self.foundPatternsSet = set() self.notUsedSpritesNameSet = set() # caches the sets of whole words of the texts read from disk self.textSetCache = dict() def process (self): print 'Processing sprite names...' self.spriteNameSet = self.getSpriteNamesSet(self.input_sprite_path) if not self.spriteNameSet: print 'No sprites found, stopping.' return print 'Checking which sprite is not used...' self.foundPatternsSet = set() for spriteName in self.spriteNameSet: found = self.searchInDir(self.input_gamelevels_path, spriteName) if found: self.foundPatternsSet.add(spriteName) #print "found patterns: " , self.foundPatternsSet #print "sprite name set: " , self.spriteNameSet self.notUsedSpritesNameSet = self.spriteNameSet.difference(self.foundPatternsSet) #print "difference: " , self.notUsedSpritesNameSet def printResults (self): """ Prints the processing results. If nothing was processed, an empty set is printed """ print 'Not used sprites: ' for spriteName in self.notUsedSpritesNameSet: print '--- ' , spriteName def getSpriteNamesSet (self, dir): """ Gets the names of the sprites in a given dir. Note that this function assumes that all names are UNIQUE, since a set is being used for storing the names Params: - dir: directory where to start looking for sprites Returns: set with the names of the sprites found """ spritesSet = set() for (dirpath, dirnames, filenames) in os.walk(dir): for filename in filenames: if filename.endswith('.sprite'): spriteName = os.path.splitext(filename)[0] spritesSet.add(spriteName) for dirname in dirnames: # not ignored dir? if dirname in self.ignoreDirSet: # remove it from the list so that we don't recurse there dirnames.remove(dirname) return spritesSet def searchInDir (self, dir, pattern): """ Walks the given directory recursively searching for the given patterns in the files. Params: - dir: directory to search recursively - pattern: pattern to search in the files of the directory Returns: True if pattern is found, False otherwise. """ # search the pattern in the files recursively. If found in a file, # the search will be stopped. for (dirpath, dirnames, filenames) in os.walk(dir): for filename in filenames: #only search in files with the given extensions file = os.path.join(dirpath, filename) ext = os.path.splitext(file)[1] if ext in self.fileExtSet: found = self.searchInText(file, pattern) if (found): return True #found the pattern, so stop searching for dirname in dirnames: # not ignored dir? if dirname in self.ignoreDirSet: # remove it from the list so that we don't recurse there dirnames.remove(dirname) return False def searchInText (self, file, pattern): """ Searches a pattern in the given file. Returns: True if found, False otherwise """ # file loaded previously? if (file in self.textSetCache): gamefile_set = self.textSetCache[file] else: fileHandler = open(file, 'r') gamefile_text = fileHandler.read() # find all whole words (CASE-INSENSITIVE) and create a set from them gamefile_set = set(re.findall(r"\b\w+\b", gamefile_text, re.IGNORECASE)) # cache the set self.textSetCache[file] = gamefile_set fileHandler.close(); # search for the given pattern found = pattern in gamefile_set; if (found): #print 'Found ' , pattern, ' in file: ' , file return True return False def printUsage (): print '' print 'usage: %prog input_path game_levels_path' print '' print 'Lists the sprites from a given folder that are not used in the game levels in the given folder' print ' input_path path to the input folder without quotation marks' print ' game_levels_path path to the game levels folder without quotation marks' if __name__ == '__main__': if len(sys.argv[1:]) != 2: print 'Invalid number of arguments' printUsage() exit(2) input_sprite_path = sys.argv[1] input_gamelevels_path = sys.argv[2] if (not os.path.exists(input_sprite_path)): print 'ERROR: input folder <' , input_sprite_path , '> does not exist' printUsage() exit(2) if (not os.path.exists(input_gamelevels_path)): print 'ERROR: game folder <' , input_gamelevels_path , '> does not exist' printUsage() exit(2) spriteLister = UsedSpritesLister() spriteLister.input_sprite_path = input_sprite_path spriteLister.input_gamelevels_path = input_gamelevels_path print 'Input dir: ' , input_sprite_path print 'Game levels dir: ', input_gamelevels_path print 'Processing...' spriteLister.process() spriteLister.printResults() print 'Finished'