xref.py

Go to the documentation of this file.
00001 ##
00002 # 
00003 # Implements the Xref function which will generate a dictionary of the
00004 # tokens separated by whitespace and punctuation in a text file.  The
00005 # contents of the dictionary are the line numbers (1-based) the tokens
00006 # appear on.
00007 # 
00008 # Copyright (C) 2002 GDS Software
00009 # 
00010 # This program is free software; you can redistribute it and/or
00011 # modify it under the terms of the GNU General Public License as
00012 # published by the Free Software Foundation; either version 2 of
00013 # the License, or (at your option) any later version.
00014 # 
00015 # This program is distributed in the hope that it will be useful,
00016 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018 # GNU General Public License for more details.
00019 # 
00020 # You should have received a copy of the GNU General Public
00021 # License along with this program; if not, write to the Free
00022 # Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
00023 # MA  02111-1307  USA
00024 # 
00025 # See http://www.gnu.org/licenses/licenses.html for more details.
00026 # 
00027 
00028 import re
00029 __version__ = "$Id: xref.py,v 1.3 2002/08/21 12:41:49 donp Exp $"
00030 
00031 punctuation_reG = re.compile("/|\"|'|\.|\,|\?|\s|<|>|\[|\]|\{|\}|:|;|\||\\\\|~|`|!|@|#|\$|%|\^|&|\*|\(|\)|-|=|\+")
00032     
00033 
00034 def Xref(filename, preserve_case = 0):
00035     import string
00036     global punctuation_reG, whitespaceG
00037     try:
00038         fp = open(filename, "r")
00039         lines = fp.readlines()
00040         fp.close()
00041     except:
00042         raise "Couldn't read input file \"%s\"" % filename
00043     # Convert all punctuation to spaces.
00044     for line_num in xrange(len(lines)):
00045         line = punctuation_reG.sub(" ", lines[line_num])
00046         if not preserve_case:
00047             lines[line_num] = string.lower(line)
00048         else:
00049             lines[line_num] = line
00050 
00051     # Now split lines into words and build the list of words
00052     dict = {}
00053     for line_num in xrange(len(lines)):
00054         if lines[line_num] == "":  continue
00055         words = re.split("  *", lines[line_num])
00056         for word in words:
00057             if word == "":  continue
00058             if not dict.has_key(word):
00059                 dict[word] = []
00060             line_num_1_based = line_num + 1
00061             if line_num_1_based not in dict[word]:
00062                 dict[word].append(line_num_1_based)
00063     return dict
00064 
00065 if __name__ == '__main__':
00066     import sys
00067     if len(sys.argv) != 2:
00068         print "Usage:  xref file"
00069         sys.exit(1)
00070     words = Xref(sys.argv[1], 1)
00071     list = []
00072     # Find longest word
00073     maxlen = 0
00074     for key in words.keys():
00075         if len(key) > maxlen:
00076             maxlen = len(key)
00077     # Now print the output
00078     template = "%%-%ds: " % maxlen
00079     for key in words.keys():
00080         str = template % key
00081         for line_num in words[key]:
00082             s = "%d," % line_num
00083             str = str + s
00084         str = str[:len(str)-1]  # Remove last comma
00085         list.append(str)
00086     list.sort()
00087     for s in list:
00088         print s
00089 
00090 

© Copyright 2008-2009 Vyper Logix Corp., All Right Reserved; If you reference this document or any part of this document you must use the citation verbatim (including the link) "© Copyright 2008-2009 Vyper Logix Corp., All Right Reserved."

Notice: This source code contained in this document is NOT open source and is NOT being distributed as open source.

122,241 lines of code and growing...