#!/usr/bin/env python
"""In this module we extract complexity measures from Java ASTs."""
import logging
import copy
import re
import javalang
JAVA_BRANCH_TYPES = [
'IfStatement', 'WhileStatement', 'DoStatement', 'ForStatement',
'SwitchStatement', # 'SwitchStatementCase',
'TernaryExpression' # , 'TryStatement', cognitive_complexity_sonar does not increment nesting for TryStatement
]
JAVA_NESTING_TYPES = JAVA_BRANCH_TYPES + ['CatchClause']
JAVA_BRANCH_TYPES_MCCC = [
'IfStatement', 'WhileStatement', 'DoStatement', 'ForStatement',
'SwitchStatementCase',
'TernaryExpression' # , 'TryStatement', cognitive_complexity_sonar does not increment nesting for TryStatement
]
JAVA_SEQUENCE_TYPES = ['&&', '||']
SM_DT = {'I': 'int', 'J': 'long', 'V': 'Void', 'Z': 'boolean', 'F': 'float', 'C': 'char', 'B': 'byte', 'D': 'double'}
SM_DT_INV = {v.lower(): k for k, v in SM_DT.items()}
[docs]class SourcemeterConversion(object):
"""This is a utility class for matching against Sourcemeter long_names."""
def __init__(self):
self._param = re.compile('\(([\w,;,/,\$,\[]*)\)([\w,;,,\./,\$,\[]+)') # used to map sourcemeter params and return code
self._log = logging.getLogger('coastSHARK')
def _remove_bracket(self, dt):
count = 0
while dt.startswith('['):
dt = dt[1:]
count += 1
return dt, count
[docs] def get_sm_long_name2(self, long_name):
"""Return a collapsed Sourcemeter long_name."""
self._long_name = long_name
m = re.search(self._param, long_name)
if not m:
raise Exception('no match found for {}'.format(long_name))
ret, bc = self._remove_bracket(m.group(2))
if ret.startswith('L'):
pruned = self._match_l(ret[1:-1])
if pruned.lower() in SM_DT_INV.keys():
ret = '[' * bc + SM_DT_INV[pruned.lower()]
else:
ret = '[' * bc + self._match_l('L{};'.format(pruned))
plist = []
rest_param = m.group(1)
while len(rest_param) > 0 and rest_param[0] != ';':
rest_param, bc = self._remove_bracket(rest_param)
if rest_param.startswith('L'):
tmp = rest_param.split(';')
# reattach the rest
if len(tmp) > 1:
rest_param = ';'.join(tmp[1:]) + ';' # need to append ; again
else:
rest_param = ''
# drop L, ; is already dropped
pruned = self._match_l(tmp[0][1:])
if pruned.lower() in SM_DT_INV.keys():
r = SM_DT_INV[pruned.lower()]
plist.append('[' * bc + r)
else:
if pruned:
plist.append('[' * bc + 'L{};'.format(pruned))
else:
plist.append('L')
else:
plist.append('[' * bc + rest_param[0])
rest_param = rest_param[1:]
start = long_name.split('(')
return '{}({}){}'.format(start[0], ''.join(plist), ret)
[docs] def get_sm_long_name(self, method):
"""Return a collapsed Sourcemeter long_name from our own method data."""
ps = []
for param in method['parameter_types']:
pruned, bc = self._remove_bracket(param)
if pruned.lower() in SM_DT_INV.keys():
ps.append('[' * bc + SM_DT_INV[pruned.lower()])
else:
ps.append('[' * bc + 'L{};'.format(pruned))
if method['return_type'] == 'Void':
ret = 'V'
else:
pruned, bc = self._remove_bracket(method['return_type'])
if pruned.lower() in SM_DT_INV:
ret = '[' * bc + SM_DT_INV[pruned.lower()]
else:
ret = '[' * bc + 'L{};'.format(pruned)
params1 = ''.join(ps)
params2 = ''
for p in ps:
if p.lower() == 'j':
params2 += 'L'
else:
params2 += p
var1 = '{}.{}.{}({}){}'.format(method['package_name'], method['class_name'], method['method_name'], params1, ret)
var2 = '{}.{}.{}({}){}'.format(method['package_name'], method['class_name'], method['method_name'], params2, ret)
return var1, var2
[docs] def get_sm_params(self, long_name):
"""Take a Sourcemeter long_name for a method and return a Mapping to our method parameter types and return types.
This is used to match overloaded functions.
"""
self._long_name = long_name
m = re.search(self._param, long_name)
if not m:
raise Exception('no match found for {}'.format(long_name))
params = self._match_params(m.group(1))
ret = self._match_return(m.group(2))
return params, ret
def _match_params(self, param):
plist = []
rest_param = param
while len(rest_param) > 0 and rest_param[0] != ';':
rest_param, bc = self._remove_bracket(rest_param)
if rest_param.startswith('L'):
tmp = rest_param.split(';')
# reattach the rest
if len(tmp) > 1:
rest_param = ';'.join(tmp[1:]) + ';' # need to append ; again
else:
rest_param = ''
# drop L, ; is already dropped
plist.append(self._match_l(tmp[0][1:]))
else:
plist.append(self._match_dt(rest_param[0]))
rest_param = rest_param[1:]
return plist
def _match_l_inner(self, dt, match):
num_ls = ''
c = dt.split(match)[0]
while c.startswith('L'):
num_ls += 'L'
c = c[1:]
return '{}{}'.format(num_ls, dt.split(match)[-1])
def _match_l(self, dt):
if '$' in dt:
return self._match_l_inner(dt, '$')
elif '.' in dt:
return self._match_l_inner(dt, '.')
elif '/' in dt:
return self._match_l_inner(dt, '/')
else:
return dt
def _match_dt(self, dt):
dt, bc = self._remove_bracket(dt)
try:
r = '[' * bc + SM_DT[dt]
except KeyError as e:
self._log.error('[SM MATCH] no such dt key: {} for long_name: {}'.format(dt, self._long_name))
return r
def _match_return(self, ret):
ret, bc = self._remove_bracket(ret)
if not ret.startswith('L'):
return '[' * bc + self._match_dt(ret)
else:
# get everything between L and ; and return last part
return '[' * bc + self._match_l(ret[1:-1])
[docs]class ComplexityJava(object):
"""Extracts complexity measures from the AST."""
def __init__(self, compilation_unit_ast):
"""Extract complexity measures from the compilation units AST.
:param compilation_unit_ast: The AST of the compilation unit (.java file).
"""
self.ast = compilation_unit_ast
self._log = logging.getLogger('coastSHARK')
self._binop = re.compile('binop:"(!?\W+)"') # used to match operators for sequence changes
def _method_params(self, method):
"""Extract method parameter and return types, we already do some Sourcmeter notations here, e.g., [ for array."""
ret_type = 'Void'
if hasattr(method, 'return_type') and method.return_type:
r = method.return_type
# see below
dim = ''
if hasattr(r, 'dimensions') and r.dimensions:
dim = '[' * len(r.dimensions)
# see below
while hasattr(r, 'sub_type') and r.sub_type:
r = r.sub_type
ret_type = '{}{}'.format(dim, r.name)
params = []
for param in method.parameters:
t = param.type
# we count dimensions which gives us the array informaiton, e.g., int[] or int[][], we also add this in sourcemeter notation
dim = ''
if hasattr(t, 'dimensions') and t.dimensions:
dim = '[' * len(t.dimensions)
# method(String... args)
if param.varargs:
dim = '['
# we follow the chain because if we would not do that we would return java instead of ResultSet for java.sql.ResultSet
while hasattr(t, 'sub_type') and t.sub_type:
t = t.sub_type
name = '{}{}'.format(dim, t.name)
params.append(name)
return params, ret_type
def _find_max_level(self):
"""Find maximal level for Counting anonymous Classes.
A normal ClassDeclaration counts as one level, a ClassCreator counts only if it has inline methods defined.
"""
max_level = 0
for path, method in self.ast:
if type(method).__name__ != 'MethodDeclaration':
continue
level = 0
for i, p in enumerate(path):
if type(p).__name__ == 'ClassDeclaration':
level += 1
if type(p).__name__ == 'ClassCreator':
if 'MethodDeclaration' in [type(n).__name__ for n in path[i + 1]]:
level += 1
if level > max_level:
max_level = level
return max_level + 1
def _class_name(self, path):
"""Return class name in Sourcemeter notation."""
names = []
for i, n in enumerate(path):
class_name = '$'.join(names)
if type(n).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration']:
# in this case we have a named class inside a method we prepend the counter ala $1NamedClass
if type(path[i - 2]).__name__ == 'MethodDeclaration':
names.append(str(self._count_map[n.position[0]][1]) + n.name)
# normal named class, just append the name
else:
names.append(n.name)
elif type(n).__name__ == 'ClassCreator' and self._has_immediate_method(n):
names.append(str(self._count_map[n.position[0]][0]))
class_name = '$'.join(names)
return class_name
def _has_immediate_method(self, node):
"""Check if the node has a method."""
for _, n1 in node:
if type(n1).__name__ == 'MethodDeclaration' and len(_) == 2:
return True
return False
def _parent_pos(self, path):
"""Return parent position for our relevant node types."""
first = None
for n in reversed(path):
if type(n).__name__ not in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration', 'ClassCreator']:
continue
if type(n).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration']:
first = n
break
if type(n).__name__ == 'ClassCreator' and self._has_immediate_method(n):
first = n
break
return first.position[0]
def _parse_level_positions(self, ast, max_level=10):
"""Count positions and levels for our relevant node types."""
for current_level in range(1, max_level):
for path, node in ast:
# these do not count towards levels
if type(node).__name__ not in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration', 'ClassCreator']:
continue
# these count towards the levels
level = 0
for i, n in enumerate(path):
if type(n).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration']:
level += 1
if type(n).__name__ == 'ClassCreator' and self._has_immediate_method(n):
level += 1
# evaluate the current level
if level == current_level:
# we do only count classCreator nodes with inline methods
if type(node).__name__ == 'ClassCreator' and not self._has_immediate_method(node):
continue
parent_pos = self._parent_pos(path)
line = node.position[0]
# sadly, we need two lists, one for ClassCreators with methods and another for named classes that are defined inside methods
if parent_pos not in self._level_map.keys():
self._level_map[parent_pos] = 0
if parent_pos not in self._level_map2.keys():
self._level_map2[parent_pos] = 0
# only count pos for inline classes in methods
if type(node).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration'] and type(path[i - 1]).__name__ == 'MethodDeclaration':
self._level_map2[parent_pos] += 1
# or inline class creators with methods
if type(node).__name__ == 'ClassCreator' and self._has_immediate_method(node):
self._level_map[parent_pos] += 1
pos1 = self._level_map[parent_pos]
pos2 = self._level_map2[parent_pos]
self._count_map[line] = (pos1, pos2)
[docs] def cognitive_complexity(self):
"""Extract complexity metrics for all methods of the current file."""
self._level_map = {}
self._level_map2 = {}
self._count_map = {}
self._parse_level_positions(self.ast, self._find_max_level())
package = None
for path, node in self.ast.filter(javalang.tree.PackageDeclaration):
package = node.name
for path, method in self.ast:
# we only are interested in methods and constructors
if type(method).__name__ not in ['MethodDeclaration', 'ConstructorDeclaration']:
continue
full_name = self._class_name(path)
method_name = method.name
if type(method).__name__ in 'ConstructorDeclaration':
method_name = '<init>' # sourcemeter notation
self._log.debug('evaluating package {}, class {}, method {}'.format(package, full_name, method_name))
# gather metrics from the metric ast
cogcs = self.cognitive_complexity_sonar(method)
cc = self.cyclomatic_complexity(method)
params, ret_type = self._method_params(method)
# we may have interface methods with a body (default methods)
im = method.body is None
yield {'package_name': package, 'class_name': full_name, 'method_name': method_name, 'return_type': ret_type, 'parameter_types': params, 'cognitive_complexity_sonar': cogcs, 'cyclomatic_complexity': cc, 'is_interface_method': im}
[docs] def cognitive_complexity_sonar(self, method):
"""Extract cognitive complexity.
Description here: https://www.sonarsource.com/docs/CognitiveComplexity.pdf
"""
cogcs = 0
cogcs += self._nesting(method)
cogcs += self._binop_cc(method)
cogcs += self._recursion(method)
return cogcs
[docs] def cyclomatic_complexity(self, method):
"""Extract cyclomatic complexity (not really, we just count branch types)."""
cc = 1
for c, n in method:
if type(n).__name__ in JAVA_BRANCH_TYPES_MCCC:
cc += 1
return cc
def _sequence_key(self, path):
# 1. remove BinaryOperator from path until parent object is reached
basepath = copy.deepcopy(path)
while type(basepath[-1]).__name__ in ['BinaryOperation', 'list']:
basepath = basepath[:-1]
if type(basepath[-1]).__name__ == 'BinaryOperation' and basepath[-1].operator not in JAVA_SEQUENCE_TYPES:
basepath = basepath[:-1]
continue
# 2. use the position of the parent of the BinaryOperation, e.g., if, while as key
if not basepath[-1].position:
raise Exception('no position for: {}'.format(basepath[-1]))
return basepath[-1].position[0]
def _recursion(self, method):
"""
Detect recursion in method.
This has drawbacks! We will count recursion if we call a method of the same name as the current but with different attributes.
We do not have the ability to discern attribute types only names and number of attributes.
"""
cc = 0
for p, n in method:
if type(n).__name__ == 'MethodInvocation':
if n.member == method.name:
cc += 1
return cc
def _binop_check(self, binop):
return type(binop).__name__ == 'BinaryOperation' and binop.operator in JAVA_SEQUENCE_TYPES
def _binop_cc(self, method):
"""Here we look at sequences of binary operations.
To group them together we first find the position of one binary sequences containing element (if, etc.)
As we are traversing the tree our first BinaryOperation for a certain containing element will be the middle of all BinaryStatements on the line.
From there on we traverse the tree and get the sequence in prefix notation.
We then change to infix notation so that we can count the changes in operators that the human would see and count that.
"""
tmp = {}
sequences = {}
for p, n in method:
# this hould be the first binop per key
if self._binop_check(n):
key = self._sequence_key(p)
if key not in tmp.keys():
tmp[key] = n
# this appends the nodes to the list in prefix notation
for k, v in tmp.items():
sequences[k] = []
for p, n in v:
sequences[k].append(n)
# create infix from prefix notation
nseq = {}
for k, v in sequences.items():
stack = []
for op in reversed(v):
if self._binop_check(op):
p1 = stack.pop()
p2 = stack.pop()
p = ''
if hasattr(op, 'prefix_operators') and op.prefix_operators:
p = '!'
stack.append('({} binop:"{}{}" {})'.format(p1, p, op.operator, p2))
else:
stack.append(op)
nseq[k] = stack.pop()
# get the human readable sequence of operators and prefix operator (!)
for k, v in nseq.items():
tmp = []
for m in re.findall(self._binop, v):
if m:
tmp.append(m)
sequences[k] = tmp
self._log.debug('[seq] found sequence {} at pos {}'.format(tmp, k))
# count the changes in sequences via the operators, e.g., a && b || c
full_cc = 0
for k, v in sequences.items():
cc = 1
op = v[0]
for i, binop in enumerate(v[1:]):
prefix = None
if len(binop) > 2: # we have a prefix
prefix = '!'
binop = binop[1:]
if binop != op:
cc += 1
elif prefix: # binop == op but binop has prefix
cc += 1
op = binop
full_cc += cc
self._log.debug('[seq] cc for sequence {} at {} is {}'.format(v, k, cc))
return full_cc
def _nesting(self, method):
cc = 0
for path, n in method:
nest = 0 # MethodDeclaration does not count so we can do this here
for p in path:
if type(p).__name__ in JAVA_NESTING_TYPES:
nest += 1
self._log.debug('[nest] {} in {} increase nesting level to {}'.format(p, 'JAVA_NESTING_TYPES', nest))
if type(p) == [] and type(p[0]).__name__ in JAVA_NESTING_TYPES:
nest += 1
self._log.debug('[nest] {} in {} increase nesting level to {}'.format(p[0], 'JAVA_NESTING_TYPES', nest))
nesting = nest
if type(n).__name__ in JAVA_BRANCH_TYPES + ['TryStatement']:
cc += 1 + nesting
self._log.debug('[nest] {} in {} increase cc to {} including nesting {}'.format(n, 'JAVA_BRANCH_TYPES and Try', cc, nesting))
return cc