00001 ## 00002 # 00003 # Implements the Xref function which will generate a dictionary of the 00004 # tokens separated by whitespace and punctuation in a text file. The 00005 # contents of the dictionary are the line numbers (1-based) the tokens 00006 # appear on. 00007 # 00008 # Copyright (C) 2002 GDS Software 00009 # 00010 # This program is free software; you can redistribute it and/or 00011 # modify it under the terms of the GNU General Public License as 00012 # published by the Free Software Foundation; either version 2 of 00013 # the License, or (at your option) any later version. 00014 # 00015 # This program is distributed in the hope that it will be useful, 00016 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 # GNU General Public License for more details. 00019 # 00020 # You should have received a copy of the GNU General Public 00021 # License along with this program; if not, write to the Free 00022 # Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 00023 # MA 02111-1307 USA 00024 # 00025 # See http://www.gnu.org/licenses/licenses.html for more details. 00026 # 00027 00028 import re 00029 __version__ = "$Id: xref.py,v 1.3 2002/08/21 12:41:49 donp Exp $" 00030 00031 punctuation_reG = re.compile("/|\"|'|\.|\,|\?|\s|<|>|\[|\]|\{|\}|:|;|\||\\\\|~|`|!|@|#|\$|%|\^|&|\*|\(|\)|-|=|\+") 00032 00033 00034 def Xref(filename, preserve_case = 0): 00035 import string 00036 global punctuation_reG, whitespaceG 00037 try: 00038 fp = open(filename, "r") 00039 lines = fp.readlines() 00040 fp.close() 00041 except: 00042 raise "Couldn't read input file \"%s\"" % filename 00043 # Convert all punctuation to spaces. 00044 for line_num in xrange(len(lines)): 00045 line = punctuation_reG.sub(" ", lines[line_num]) 00046 if not preserve_case: 00047 lines[line_num] = string.lower(line) 00048 else: 00049 lines[line_num] = line 00050 00051 # Now split lines into words and build the list of words 00052 dict = {} 00053 for line_num in xrange(len(lines)): 00054 if lines[line_num] == "": continue 00055 words = re.split(" *", lines[line_num]) 00056 for word in words: 00057 if word == "": continue 00058 if not dict.has_key(word): 00059 dict[word] = [] 00060 line_num_1_based = line_num + 1 00061 if line_num_1_based not in dict[word]: 00062 dict[word].append(line_num_1_based) 00063 return dict 00064 00065 if __name__ == '__main__': 00066 import sys 00067 if len(sys.argv) != 2: 00068 print "Usage: xref file" 00069 sys.exit(1) 00070 words = Xref(sys.argv[1], 1) 00071 list = [] 00072 # Find longest word 00073 maxlen = 0 00074 for key in words.keys(): 00075 if len(key) > maxlen: 00076 maxlen = len(key) 00077 # Now print the output 00078 template = "%%-%ds: " % maxlen 00079 for key in words.keys(): 00080 str = template % key 00081 for line_num in words[key]: 00082 s = "%d," % line_num 00083 str = str + s 00084 str = str[:len(str)-1] # Remove last comma 00085 list.append(str) 00086 list.sort() 00087 for s in list: 00088 print s 00089 00090
© Copyright 2008-2009 Vyper Logix Corp., All Right Reserved; If you reference this document or any part of this document you must use the citation verbatim (including the link) "© Copyright 2008-2009 Vyper Logix Corp., All Right Reserved."
Notice: This source code contained in this document is NOT open source and is NOT being distributed as open source.
122,241 lines of code and growing...