Let’s take handling any encoding of files one step further.
We need to look for specific text in in files in a directory regardless of encoding. Here is one way in Python.
#! /usr/bin/python
import sys
import os.path
import os
import re
import fnmatch
def DecodeBytes(byteArray, codecs=['utf-8', 'utf-16']):
for codec in codecs:
try:
return byteArray.decode(codec)
except:
pass
def ReadLinesFromFile(filename):
file = open(filename, "rb")
rawbytes = file.read()
file.close()
content = DecodeBytes(rawbytes)
if content is not None:
return content.split(os.linesep)
# this came from http://stackoverflow.com/questions/1863236/grep-r-in-python
# with a substitution of ReadLinesFromFile and a file name match filter
def RecursiveGrep(pattern, dir, match):
r = re.compile(pattern)
for parent, dnames, fnames in os.walk(dir):
fnames = fnmatch.filter(fnames, match)
for fname in fnames:
filename = os.path.join(parent, fname)
if os.path.isfile(filename):
lines = ReadLinesFromFile(filename)
if lines is not None:
idx = 0
for line in lines:
if r.search(line):
yield filename + "|" + str(idx) + "|" + line.strip()
idx += 1
lines = RecursiveGrep("needle", "\yourpath", "*.cs")
The will recurse all subdirectories, looking in all .cs files to find needed returning the data in this format (pipe separated):
full file path|line number|line content
Very useful on Windows with multilingual files.