Yay! Programmer art!
Yay! Programmer art!

Python grep Script with Multiple Files

The other day I was looking for a command-line grep I could use in Windows. There were several options so I picked a python script that I found on Wikipedia (I chose a script since I have a health paranoia about spyware and such).

Overall, the script had enough functionality for my purposes with one exception. It didn’t accept wildcards for multiple files. It was only a few lines of code so I went ahead and added glob functionality and it seems to work fine. In case it is useful to anyone else, here is the altered script. Thanks Vivian De Smedt for the original.

grep.py


"""
Usage: grep [OPTION]... PATTERN [FILE] ...
Search for PATTERN in each FILE or standard input.
Example: grep -i 'hello world' menu.h main.c
 
Regexp selection and interpretation:
  -E, --extended-regexp     PATTERN is an extended regular expression
  -F, --fixed-strings       PATTERN is a set of newline-separated strings
  -G, --basic-regexp        PATTERN is a basic regular expression
  -e, --regexp=PATTERN      use PATTERN as a regular expression
  -f, --file=FILE           obtain PATTERN from FILE
  -w, --word-regexp         force PATTERN to match only whole words
  -x, --line-regexp         force PATTERN to match only whole lines
  -z, --null-data           a data line ends in 0 byte, not newline
 
Miscellaneous:
  -s, --no-messages         suppress error messages
  -v, --invert-match        select non-matching lines
      --mmap                use memory-mapped input if possible
 
Output control:
  -b, --byte-offset         print the byte offset with output lines
  -H, --with-filename       print the filename for each match
  -h, --no-filename         suppress the prefixing filename on output
  -q, --quiet, --silent     suppress all normal output
      --binary-files=TYPE   assume that binary files are TYPE
                            TYPE is 'binary', 'text', or 'without-match'.
  -a, --text                equivalent to --binary-files=text
  -I                        equivalent to --binary-files=without-match
  -d, --directories=ACTION  how to handle directories
                            ACTION is 'read', 'recurse', or 'skip'.
  -r, --recursive           equivalent to --directories=recurse.
  -L, --files-without-match only print FILE names containing no match
  -l, --files-with-matches  only print FILE names containing matches
  -c, --count               only print a count of matching lines per FILE
  -Z, --null                print 0 byte after FILE name
 
Context control:
  -NUM                      same as --context=NUM
  -U, --binary              do not strip CR characters at EOL (MSDOS)
  -u, --unix-byte-offsets   report offsets as if CRs were not there (MSDOS)
 
`egrep' means `grep -E'.  `fgrep' means `grep -F'.
With no FILE, or when FILE is -, read standard input.  If less than
two FILEs given, assume -h.  Exit status is 0 if match, 1 if no match,
and 2 if trouble.
 
Report bugs to <bug-gnu-utils@gnu.org>."""
 
import sys, re, getopt, glob
 
class Queue:
  def __init__(self, size):
    self.size = size
    self.values = []
 
  def append(self, value):
    if self.size:
      self.values.append(value)
      while len(self.values) > self.size:
        self.values.pop(0)
 
  def pop(self):
    if self.values:
      return self.values.pop(0)
    return None
 
  def __len__(self):
    return len(self.values)
 
  def __getitem__(self, n):
    return self.values[n]
 
def grep(pattern, f, options):
  if options.ignore_case:
    p = re.compile(pattern, re.I)
  else:
    p = re.compile(pattern)
 
  queue = Queue(options.before_context)
  line_nb = 0
  from_line = -1
  to_line = -1
  last_line = 0 # Last printed line.
 
  while 1:
    l = f.readline()
    line_nb += 1
    if not l:
      break
 
    match = 0
    if re.search(p, l):
      from_line = line_nb - options.before_context
      to_line = line_nb + options.after_context
      match = 1
 
    if line_nb <= to_line:
      if options.before_context or options.after_context:
        if last_line and from_line > last_line + 1:
          sys.stdout.write("--\n")
 
      from_line = max(from_line, last_line + 1, -len(queue))
      for i in range(from_line - line_nb, 0):
        if options.line_number:
          sys.stdout.write("%d-" % (i + line_nb))
        sys.stdout.write(queue[i])
 
      if options.line_number:
        if match:
          sys.stdout.write("%d:" % line_nb)
        else:
          sys.stdout.write("%d-" % line_nb)
      sys.stdout.write(l)
      last_line = line_nb
    queue.append(l)
 
def printUsage():
  """Print the help string that should printed by grep.py -h"""
  print "usage: grep.py [options] pattern [file]"
  print """
  -i, --ignore-case         ignore case distinctions
  -B, --before-context=NUM  print NUM lines of leading context
  -A, --after-context=NUM   print NUM lines of trailing context
  -C, --context[=NUM]       print NUM (default 2) lines of output context
                            unless overridden by -A or -B
  -n, --line-number         print line number with output lines
  -V, --version             print version information and exit
      --help                display this help and exit
 
See http://www.vdesmedt.com/~vds2212/grep.html for informations and updates.
Send an email to vivian@vdesmedt.com for comments and bug reports."""
 
def printVersion():
  print "grep.py version 0.5.0"
 
class Options:
  def __init__(self):
    self.ignore_case = 0
    self.before_context = 0
    self.after_context = 0
    self.line_number = 0
 
def main(argv):
  options = Options()
 
  opts, args = getopt.getopt(argv, "ViA:B:C:n", ["help", "version", "ignore-case", "before-context=", "after-context=", "context=", "line-number"])
  for o, v in opts:
    if o in ["-i", "--ignore-case"]:
      options.ignore_case = 1
    if o in ["-A", "--after-context"]:
      options.after_context = int(v)
    if o in ["-B", "--before-context"]:
      options.before_context = int(v)
    if o in ["-C", "--context"]:
      if not v:
        v = 2
      options.after_context = int(v)
      options.before_context = int(v)
    if o in ["-n", "--line-number"]:
      options.line_number = 1
    elif o in ["-V", "--version"]:
      printVersion()
      return 0
    elif o in ["--help"]:
      printUsage()
      return 0
 
  if len(args) <= 0:
    printUsage()
    return 1
 
  pattern = args[0]
 
  if len(args) == 1:
    return grep(pattern, sys.stdin, options)
 
  fileList = glob.glob(args[1])
  for fileName in fileList:
    f = open(fileName)
    print fileName + " : "
    r = grep(pattern, f, options)
    f.close()
 
  return r
 
if __name__ == "__main__":
  sys.exit(main(sys.argv[1:]))
 

2 Comments

  1. tyler
    Posted January 13, 2008 at 2:49 am | Permalink

    what does this script do?

  2. Posted January 13, 2008 at 3:26 pm | Permalink

    It is a basic command line ‘grep’ function similar to the linux grep.

    http://en.wikipedia.org/wiki/Grep

Post a Comment

Your email is never published nor shared. Required fields are marked *
*
*

For spam detection purposes, please copy the number 6851 + 1 to the field below: