This site is the archived OWASP Foundation Wiki and is no longer accepting Account Requests.
To view the new OWASP Foundation website, please visit https://owasp.org
Jython Script to extract metadata from Java class files (O2P)
From OWASP
Revision as of 16:44, 4 September 2009 by Dinis.cruz (talk | contribs)
This is the current code that is executed using Jython that creates an XML file with the Java metatag information. Note that this was my (Dinis) first ever Python script so I was busking it all the way (while basterdizing John's code :) )
import sys
from javassist.bytecode import *
from java.io import *
#<begin of Dinis Changes>
import java
import os
' global vars'
true = 1
false = 0
def createXmlElement(name, parameters, innerXml,indent,allContentInSameLine, encodeInnerXml) :
if encodeInnerXml:
innerXml = innerXml.replace("&","&").replace("<","<").replace(">",">")
parametersTextValue = ""
for parameter in parameters:
parameterName = parameter
# if isinstance (parameters[parameter],int):
# parameterValue += parameters[parameter]
# else:
parameterValue = parameters[parameter].replace("\"", """).replace("<","<").replace(">",">").replace("&","&")
parametersTextValue += " %s = \"%s\"" % (parameter, parameterValue)
beginTagValue = name + parametersTextValue
engTagValue = name
if innerXml == "":
return ("\n%s<%s/>" % ("\t"*indent,beginTagValue) )
if allContentInSameLine:
return ("\n%s<%s>%s</%s>" % ("\t"*indent,beginTagValue, innerXml, engTagValue) )
return ("\n%s<%s>%s%s\n%s</%s>" % ("\t"*indent, beginTagValue ,"\t"*(indent+1), innerXml,"\t"*indent, engTagValue) )
def getAnnotationInfo(xmlNodeName,annotations):
annotationsInfo = ""
for annotation in annotations:
if annotation == None:
annotationsInfo += createXmlElement(xmlNodeName,{}, "" ,3,false,false)
else:
annotationParams = {"typeName":annotation.typeName,"toString":annotation.toString()}
' check if there are members in this attribute'
annotationMembers = ""
if annotation.memberNames != None:
for annotationMemberName in annotation.memberNames:
annotationMember = {"memberName":annotationMemberName,"memberValue":annotation.getMemberValue(annotationMemberName).toString()}
annotationMembers += createXmlElement("member", annotationMember,"",4,true,true)
annotationsInfo +=createXmlElement(xmlNodeName, annotationParams, annotationMembers,3,false,false)
return annotationsInfo
def getAttributeInfo(attributes):
attributeInfo = ""
for attribute in attributes:
if attribute == None:
annotationsInfo += createXmlElement("attribute",{}, "" ,4,false,false)
else:
attributeParams = {"toString":attribute.toString(), "typeName": java.lang.Class.getName(attribute.getClass()),"name":attribute.name}
'if there are annotations we need to add them individually'
if hasattr(attribute,"numAnnotations"):
attributeAnnotations = getAnnotationInfo("annotation",attribute.getAnnotations())
attributeInfo += createXmlElement("attribute",attributeParams, attributeAnnotations ,2,false,false)
else:
attributeInfo += createXmlElement("attribute", attributeParams, "" ,2,true, true)
return attributeInfo
def getMethodsCalled(method):
methodsCalled = ""
codeAttribute = method.getCodeAttribute()
if (codeAttribute == None):
return methodsCalled;
constPool = codeAttribute.getConstPool();
codeIterator = method.getCodeAttribute().iterator()
while (codeIterator.hasNext()):
next = codeIterator.next()
byteCodeInst = codeAttribute.getCode()[next]
if (byteCodeInst == -73 or byteCodeInst == -74): # method call
methodCalled_Index = 0x000000FF & codeAttribute.getCode()[next+2] # need to convert signed byte value into an unsigned value
methodCalled_Index += (0x000000FF & codeAttribute.getCode()[next+1]) * 0x100
methodCalled_Class = constPool.getMethodrefClassName(methodCalled_Index)
methodCalled_Name = constPool.getMethodrefName(methodCalled_Index)
methodCalled_Descriptor = constPool.getUtf8Info(constPool.getNameAndTypeDescriptor(constPool.getMethodrefNameAndType(methodCalled_Index)))
methodCalled_LineNumber = method.getLineNumber(next).__str__()
methodCalled_Signature = methodCalled_Class + "." + methodCalled_Name + methodCalled_Descriptor;
xmlElementAttributes = {"lineNumber":methodCalled_LineNumber,"class":methodCalled_Class,"name":methodCalled_Name,"descriptor":methodCalled_Descriptor,"signature":methodCalled_Signature}
methodsCalled += createXmlElement("methodCalled", xmlElementAttributes,"",5,True,True)
if (byteCodeInst == -71): # interface call
methodCalled_Index = 0x000000FF & codeAttribute.getCode()[next+2];
methodCalled_Index += (0x000000FF & codeAttribute.getCode()[next+1]) * 0x100
methodCalled_Class = constPool.getInterfaceMethodrefClassName(methodCalled_Index)
methodCalled_Name = constPool.getInterfaceMethodrefName(methodCalled_Index)
methodCalled_Descriptor = constPool.getUtf8Info(constPool.getNameAndTypeDescriptor(constPool.getInterfaceMethodrefNameAndType(methodCalled_Index)))
methodCalled_LineNumber = method.getLineNumber(next).__str__()
methodCalled_Signature = methodCalled_Class + "." + methodCalled_Name + methodCalled_Descriptor;
xmlElementAttributes = {"lineNumber":methodCalled_LineNumber,"class":methodCalled_Class,"name":methodCalled_Name,"descriptor":methodCalled_Descriptor,"signature":methodCalled_Signature}
methodsCalled += createXmlElement("methodCalled", xmlElementAttributes,"",5,True,True)
return methodsCalled
def getSuperClassAndInterfaces(classObject):
#print "superclass: " + classObject.getSuperclass()
superClassAndInterfaces = createXmlElement("superclass", {"name":classObject.getSuperclass()},"",2,true,false)
for _interface in classObject.getInterfaces():
# print "interface: " + _interface
superClassAndInterfaces += createXmlElement("interface", {"name":_interface},"",2,true,false)
return superClassAndInterfaces;
def getClassinfo(path, attributeParser):
classObject = attributeParser._acquireClassFile(path)
classAttributes = getAttributeInfo(attributeParser.getClassAttributes(path))
superClassAndInterfaces = getSuperClassAndInterfaces(classObject)
'calculate methods'
methodsNodes = getMethodsinfo(path,attributeParser)
'methodsNode = createXmlElement("methods",methodsChildNodes,2,true,false)'
'put it all together'
classChildNodes = classAttributes + superClassAndInterfaces + methodsNodes
sourceFile = os.path.dirname(classObject.getName().replace('.', '\\')) +"\\"
if classObject.getSourceFile() != None:
sourceFile = sourceFile + classObject.getSourceFile();
classNode = createXmlElement("class", {"sourceFile":sourceFile,"name":classObject.name} ,classChildNodes,1,false,false);
return classNode , classObject
def getMethodsinfo(path, attributeParser):
methods = attributeParser._acquireClassFile(path).getMethods()
xmlContent = ""
' add methods details'
for method in methods:
'xmlContent+="\n<M>" + method.toString() + "</M>"'
xmlNodeParams = {"name": method.name, "descriptor": method.descriptor, "lineNumber": method.getLineNumber(0).__str__()}
xmlNodeInnerXml = getAttributeInfo(attributeParser.getMethodAttributes(path,method.name, method.descriptor))
xmlNodeInnerXml+= getAnnotationInfo("parameterAnnotation",attributeParser.getMethodParametersAttributes(path,method.name, method.descriptor))
xmlNodeInnerXml += getMethodsCalled(method)
xmlContent += createXmlElement("method",xmlNodeParams ,xmlNodeInnerXml,2,false, false)
return xmlContent
def canProcessFile(path):
return AttributeParser()._acquireClassFile(path) != None
def createXmlFile(resultsFolder, path):
print "\n Creating XmlFile with Attribute info for: " + path
if (canProcessFile(path)):
attributeParser = AttributeParser()
classNode ,classObject = getClassinfo(path,attributeParser)
xmlContent = createXmlElement("JavaAttributeMappings", [],classNode,0,false, false)
xmlFileName = resultsFolder +"\\"+classObject.name +".JavaAttributes.xml"
xmlFile = open(xmlFileName,"w")
xmlFile.write(xmlContent)
xmlFile.close
'print "Xml file created: " + xmlFileName'
def createXmlFileForDir(resultsFolder, dirPath):
print "\n Creating XmlFile with Attribute info for directory : " + dirPath + "\n"
for root, dirs, files in os.walk(dirPath):
for file in files:
if file.endswith(".class"):
createXmlFile(resultsFolder, root + "\\" + file)
'</begin of Dinis Changes>'
' Below is John\'s original code'
class AttributeParser(object):
'''This class uses a bytecode parsing approach to implement query of
class, method, method parameter, and field annotations on a
path-to-classfile target.
Dependencies include:
jython 2.5.0 - http://bugs.jython.org (Python Interpreter written in
Java that has an embedded Java VM)
javassist-3.11.0 - http://www.csg.is.titech.ac.jp/~chiba/javassist/ (
Java bytecode manipulation tool, that supports JRE 1.6)
Execute the tool using the following:
java -jar jython.jar -Dpython.path=./javassist.jar
>>>import Annotations
>>>ap = Annotations.AttributeParser()
>>>help(ap)
Author: John Steven ([email protected])
Created on: 07-10-09
LastChanged: $LastChangedDate$
Modified by: Dinis Cruz ([email protected]
Modified on: 11-10-09
Where: places where the quality of the Python code dramatically drops :)
'''
def getClassAttributes(self, path_to_clazz):
'''getClassAttributes() returns a list of annotations associated with the
class definition.
path_to_clazz : str - the absolute path to the class file to interrogate
returns : [] - list of zero or more class annotations.
returns : None - Error finding/loading/parsing class file data
'''
cf = self._acquireClassFile(path_to_clazz)
if cf == None: return None
attributes_to_return = list()
try:
attributes = cf.getAttributes()
for attribute in attributes:
if isinstance(attribute, SourceFileAttribute):
pass # Explicitly punt this attribute type
elif isinstance(attribute, InnerClassesAttribute):
pass # Explicitly punt this attribute type
else:
attributes_to_return.append(attribute)
return attributes_to_return
except:
print 'WARNING - %s: encountered %s %s getting attributes in %s' %\
(self.__class__.__name__, sys.exc_info()[0],
sys.exc_info()[1], path_to_clazz)
return None
def getFieldAttributes(self, path_to_clazz, field_name):
'''getFieldAttributes() return a list of annotations associated with a
field within the class definition.
path_to_clazz : str - the absolute path to the class file to interrogate
field_name : str - the name of the class' field the query targets
returns : [] - list of zero or more field annotations.
returns : None - Error finding/loading/parsing class file data
'''
cf = self._acquireClassFile(path_to_clazz)
if cf == None: return None
try:
return list(f.getAttributes()[0] for f in cf.getFields() \
if str(f.getName()) == field_name)
except:
print 'WARNING - %s: encountered %s %s getting %s\'s attributes for %s' %\
(self.__name__, sys.exc_info()[0], sys.exc_info()[1], field_name,
path_to_clazz)
return None
def getMethodAttributes(self, path_to_clazz, method_name, signature=None):
'''getMethodAttributes() return a list of annotations associated with (a)
method(s) within the class definition.
path_to_clazz : str - the absolute path to the class file to interrogate
method_name : str - the name of the class' field the query targets
signature : Boolean - whether or not to match methods by full signature
rather than just name (useful if method is overloaded)
returns : [] - list of zero or more method annotations.
returns : None - Error finding/loading/parsing class file data
'''
return self.__getMethodAttributesImpl(path_to_clazz, method_name,
parameters=False, signature=None)
def getMethodParametersAttributes(self, path_to_clazz, method_name,
signature=None):
'''getMethodParametersAttributes() return a list of annotations associated
with (a) method's parameters.
path_to_clazz : str - the absolute path to the class file to interrogate
method_name : str - the name of the class' field the query targets
signature : Boolean - whether or not to match methods by full signature
rather than just name (useful if method is overloaded)
returns : [] - a list of the annotations associated with each parameter
within the method signture. Those not bearing annotations
will have a 'None' Entry
returns : None - Error finding/loading/parsing class file data
'''
return self.__getMethodAttributesImpl(path_to_clazz, method_name,
parameters=True, signature=None)
def __getMethodAttributesImpl(self, path_to_clazz, method_name,
signature=None, parameters=False):
'''Private getMethodAttributesImpl() - configures attribute-type matching
function and then computes work of matching methods, and pulling
annotations (associated with parameters) as appropriate.
'''
def switchAnnotationType(interestInParams):
if interestInParams:
return lambda at: isinstance(at, ParameterAnnotationsAttribute)
else:
return lambda at: not isinstance(at, ParameterAnnotationsAttribute)
cf = self._acquireClassFile(path_to_clazz)
if cf == None: return None
attributes_to_return = list()
filterFn = switchAnnotationType(parameters)
try:
for method in cf.getMethods():
if self.__matchMethod(method_name, signature, method):
for attribute in (attribute for attribute in method.getAttributes()\
if filterFn(attribute)):
if isinstance(attribute, CodeAttribute):
pass # Explicitly punt this attribute type
elif isinstance(attribute, ParameterAnnotationsAttribute):
for annotation in attribute.getAnnotations():
if len(annotation) == 0:
attributes_to_return.append(None)
else:
attributes_to_return.append(annotation.pop())
else:
attributes_to_return.append(attribute)
return attributes_to_return
except:
print 'WARNING - %s: encountered %s %s getting %s\'s attributes for %s' %\
(self.__class__.__name__, sys.exc_info()[0], sys.exc_info()[1],\
method_name, path_to_clazz)
return None
def __matchMethod(self, method_name, signature, method):
'''Private matchMethod() - handles method matching by name (and if supplied
by full Java bytecode method signature).
method_name : str - the name of the method without parans, params, RV,
FQ class name, etc. 'foo' for 'foo()'
signature : str - bytecode formatted method signature (IE:
(Ljava/lang/string;)V' In the case that name-only matching
is desired, the callers should pass 'None' for this param.
method : javassist.bytecode.Method object, the reversed Method object.
returns : Boolean - True, the method matches, false otherwise.
'''
try:
if signature == None:
return method_name == method.getName()
elif method_name == method.getName():
return signature == method.getDescriptor()
else:
return False
except:
print 'WARNING - %s: Failure parsing methods %s %s' \
%(self.__class__.__name__, sys.exc_info()[0], sys.exc_info()[1])
return False
def _acquireClassFile(self, path_to_clazz):
'''semi-private acquireClassFile() - method designed as helper function to
getXXXAttribute() functions, but also servese useful in unit testing b/c
this methods returns the javassist.bytecode.ClassFile object which
represents the fully parsed, reversed class file.
path_to_clazz : str - the absolute path to the class file to interrogate
returns : javassist.bytecode.ClassFile - Revered Java Class file
'''
try:
dis = DataInputStream(BufferedInputStream(FileInputStream(path_to_clazz)))
return ClassFile(dis)
except:
''' the code throws the error:
AttributeError: 'AttributeParser' object has no attribute '__name__'
'''
'''print 'WARNING - %s: encountered %s %s getting %s' %\
(self.__name__, sys.exc_info()[0], sys.exc_info()[1], path_to_clazz)'''
print 'ERROR - encountered %s %s getting %s' %\
(sys.exc_info()[0], sys.exc_info()[1], path_to_clazz)
return None
' DC Code'
'createXmlFile(path)'
errormessage = " The first argument must be the results folder and the 2nd argument must either be the target class or the target folder\n\n"
if sys.argv.__len__() != 3:
print "\n\n Error: You need to provide two parameters." + errormessage
else:
resultsFolder = sys.argv[1]
targetClassFileOrFolder = sys.argv[2]
print "resultsFolder: " + resultsFolder
print "targetClassFileOrFolder: " + targetClassFileOrFolder
if (os.path.isdir(resultsFolder) and (os.path.isdir(targetClassFileOrFolder) or os.path.isfile(targetClassFileOrFolder))):
if os.path.isdir(targetClassFileOrFolder):
createXmlFileForDir(resultsFolder, targetClassFileOrFolder)
elif targetClassFileOrFolder.endswith(".class"):
createXmlFile(resultsFolder, targetClassFileOrFolder)
else:
print "\n\n File provided must be of extension .class:"
else:
print "\n\n Error with the arguments supplied:" + errormessage
#targetFile = "E:\\O2\\_Bin_(O2_Binaries)\\O2_Cmd_SpringMvc\\_UnitTests\\TestFiles\\EditOwnerForm.class"
#createXmlFile("C:\\O2", targetFile)