#!/usr/bin/python
"""This is a small program written with the python fiwalk framework to
break the microsoft executables from the m57 corpus. It does this by changing
characters in the first 4096 bytes of the executable that are over hex 80 to
hex FF"""
import os.path,sys
from subprocess import Popen,call,PIPE
sys.path.append(os.getenv("DOMEX_HOME") + "/src/lib/") # add the library
sys.path.append(os.getenv("DOMEX_HOME") + "/src/fiwalk/python/") # add the library
import fiwalk,hashlib
import xml.parsers.expat
redact_extensions = set([".dll",".exe",".com"])
redact_filenames = set()
redact_max_size = 4096
def should_redact(fi):
if fi.filename() in redact_filenames: return True
fnl = fi.filename().lower()
(root,ext) = os.path.splitext(fnl)
if options.debug: print "\r",fnl,
if ext in redact_extensions and fnl.startswith("windows"):
try:
content = fi.contents(icat_fallback=False)
except ValueError:
if options.debug: print " *** can't redact --- is compressed *** "
return False
if not content:
if options.debug: print " *** can't redact --- no content ***"
return False
if "Microsoft" in content:
return True
if "\0M\0i\0c\0r\0o\0s\0o\0f\0t" in content:
return True
if options.debug: print " *** won't redact --- no Microsoft ***"
return False
return False
def redact(fi):
from xml.sax.saxutils import escape
global xml_out,options
if not should_redact(fi): return
# Get the first byterun
br = fi.byteruns()[0]
if br.img_offset==0: return # this run isn't on the disk
if br.bytes==0: return # too small to redact
content = fi.contents() # before redaction
redact_bytes = min(redact_max_size,br.bytes)
fi.imagefile.seek(br.img_offset)
sector = fi.imagefile.read(redact_bytes)
# Redact the sector
# Read the data
def redact_function(ch):
if ch<'~': return ch
return '0xff'
sector = "".join(map(redact_function,sector))
# Now write it back
if options.commit:
fi.imagefile.seek(br.img_offset)
fi.imagefile.write(sector)
redacted_content = fi.contents() # after redaction
xml_out.write("\n%s\n" % (escape(fi.filename())))
xml_out.write(" %d\n" % (len(content)))
xml_out.write(" %s\n" % (fi.inode()))
xml_out.write(" %d\n" % (br.img_offset))
xml_out.write(" %d\n" % (redact_bytes))
xml_out.write(" \n")
xml_out.write(" %s\n" % (hashlib.md5(content).hexdigest()))
xml_out.write(" %s\n" % (hashlib.sha1(content).hexdigest()))
xml_out.write(" \n")
xml_out.write(" \n")
xml_out.write(" %s\n" % (hashlib.md5(redacted_content).hexdigest()))
xml_out.write(" %s\n" % (hashlib.sha1(redacted_content).hexdigest()))
xml_out.write(" \n")
xml_out.write("\n")
if __name__=="__main__":
import sys,time
from optparse import OptionParser
from subprocess import Popen,PIPE
global options,xml_out
from glob import glob
parser = OptionParser()
parser.usage = "%prog [options] imagefile"
parser.add_option("-d","--debug",help="prints debugging info",dest="debug",action="store_true")
parser.add_option("-c","--commit",help="Really do the redaction",action="store_true")
parser.add_option("--all",help="Do all",action="store_true")
(options,args) = parser.parse_args()
# First read all of the redaction files
for fn in glob("*redacted.xml*"):
try:
fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=lambda fi:redact_filenames.add(fi.filename()))
except xml.parsers.expat.ExpatError:
print "Invalid XML file:",fn
print "number of filenames in redaction XML:",len(redact_filenames)
if options.all:
for fn in glob("*.aff"):
raw = fn.replace(".aff",".raw")
if not os.path.exists(raw):
print "%s --> %s" % (fn,raw)
if call(['afconvert','-e','raw',fn])!=0:
raise RuntimeError,"afconvert of %s failed" % fn
fns = glob("*.raw")
else:
fns = args
for fn in fns:
if fn.endswith(".aff"):
raise ValueError,"Cannot redact AFF files"
print "Redacting %s" % fn
xml_out = open(fn.replace(".raw","-redacted.xml"),"w")
xml_out.write("\n")
xml_out.write("\n")
mode = "rb"
if options.commit: mode="r+b"
fiwalk.fiwalk_using_sax(imagefile=open(args[0],mode),callback=redact)
xml_out.write("\n")