miércoles, 28 de marzo de 2012

Searching file names from a directory in a given file

At work, at some point I had to clean up old stuff so that we could use it as a base for the next project. I needed a script that would search the *.sprite filenames in a given .game file, which was just a simple file with references to those sprite filenames.

If I remember well, it executed fast enough (in about 100ms for our whole sprite folders). TODO note to self: explain the algorithm!

import sys
import re
import os  

class UsedSpritesLister:
    """ Class that contains util functions to find and list the sprites that are not used in a .game file """
    
    def __init__ (self):
        # members that need to be set for the processing to work
        self.input_sprite_path = ""
        self.input_gamelevels_path = ""
        self.ignoreDirSet = set(['.svn'])
        self.fileExtSet = set(['.level'])
        
        self.spriteNameSet = set()
        self.foundPatternsSet = set()
        self.notUsedSpritesNameSet = set()
        
        # caches the sets of whole words of the texts read from disk
        self.textSetCache = dict()
        
    def process (self):
        
        print 'Processing sprite names...'
        self.spriteNameSet = self.getSpriteNamesSet(self.input_sprite_path)
        
        if not self.spriteNameSet:
            print 'No sprites found, stopping.'
            return
        
        print 'Checking which sprite is not used...'
        self.foundPatternsSet = set()
        
        for spriteName in self.spriteNameSet:
            found = self.searchInDir(self.input_gamelevels_path, spriteName)
            if found:
                self.foundPatternsSet.add(spriteName)
        
        #print "found patterns: " , self.foundPatternsSet
        #print "sprite name set: " , self.spriteNameSet
        
        self.notUsedSpritesNameSet = self.spriteNameSet.difference(self.foundPatternsSet)
        
        #print "difference: " , self.notUsedSpritesNameSet
        
    
    def printResults (self):
        """ Prints the processing results. If nothing was processed, an empty set is printed """
        print 'Not used sprites: '
        
        for spriteName in self.notUsedSpritesNameSet: 
            print '--- ' , spriteName
    
    def getSpriteNamesSet (self, dir):
        """ Gets the names of the sprites in a given dir. Note that this function
            assumes that all names are UNIQUE, since a set is being used for storing the names
            
            Params:
                - dir: directory where to start looking for sprites
            
            Returns: set with the names of the sprites found
        """
        
        spritesSet = set()
        
        for (dirpath, dirnames, filenames) in os.walk(dir):
            for filename in filenames:
                if filename.endswith('.sprite'):
                    spriteName = os.path.splitext(filename)[0]
                    spritesSet.add(spriteName)
            
            for dirname in dirnames:
                # not ignored dir?
                if dirname in self.ignoreDirSet:
                    # remove it from the list so that we don't recurse there
                    dirnames.remove(dirname)
        
        return spritesSet
        
    
    def searchInDir (self, dir, pattern):
        """ Walks the given directory recursively searching for the given patterns in the files.
        
            Params:
                - dir: directory to search recursively
                - pattern: pattern to search in the files of the directory
            
            Returns: True if pattern is found, False otherwise.
        """
        
        # search the pattern in the files recursively. If found in a file,
        # the search will be stopped.
        for (dirpath, dirnames, filenames) in os.walk(dir):
            for filename in filenames:
                #only search in files with the given extensions
                file = os.path.join(dirpath, filename)
                ext = os.path.splitext(file)[1]
                if ext in self.fileExtSet:
                    found = self.searchInText(file, pattern)
                    if (found):
                        return True #found the pattern, so stop searching
            
            for dirname in dirnames:
                # not ignored dir?
                if dirname in self.ignoreDirSet:
                    # remove it from the list so that we don't recurse there
                    dirnames.remove(dirname)
        
        return False
                    
    def searchInText (self, file, pattern):
        """ Searches a pattern in the given file.
        
            Returns: True if found, False otherwise
        """
        
        # file loaded previously?
        if (file in self.textSetCache):
            gamefile_set = self.textSetCache[file]
        else:
            fileHandler = open(file, 'r')
                    
            gamefile_text = fileHandler.read()
            
            # find all whole words (CASE-INSENSITIVE) and create a set from them
            gamefile_set = set(re.findall(r"\b\w+\b", gamefile_text, re.IGNORECASE))
            
            # cache the set
            self.textSetCache[file] = gamefile_set
        
            fileHandler.close();
        
        # search for the given pattern
        found = pattern in gamefile_set;
        if (found):
            #print 'Found ' , pattern, ' in file: ' , file
            return True
        
        return False

def printUsage ():
    print ''
    print 'usage: %prog input_path game_levels_path'
    print ''
    print 'Lists the sprites from a given folder that are not used in the game levels in the given folder'
    print '  input_path           path to the input folder without quotation marks'
    print '  game_levels_path     path to the game levels folder without quotation marks'

if __name__ == '__main__':
        
    if len(sys.argv[1:]) != 2:
        print 'Invalid number of arguments'        
        printUsage()
        exit(2)
    
    input_sprite_path = sys.argv[1]
    input_gamelevels_path = sys.argv[2]
    
    if (not os.path.exists(input_sprite_path)):
        print 'ERROR: input folder <' , input_sprite_path , '> does not exist'
        printUsage()
        exit(2)
    
    if (not os.path.exists(input_gamelevels_path)):
        print 'ERROR: game folder <' , input_gamelevels_path , '> does not exist'
        printUsage()
        exit(2)
    
    spriteLister = UsedSpritesLister()
    spriteLister.input_sprite_path = input_sprite_path
    spriteLister.input_gamelevels_path = input_gamelevels_path
    
    print 'Input dir: ' , input_sprite_path
    print 'Game levels dir: ', input_gamelevels_path
    print 'Processing...'
    
    spriteLister.process()
    spriteLister.printResults()
    
    print 'Finished'
        


No hay comentarios:

Publicar un comentario