Source code for util.complexity_java

#!/usr/bin/env python

"""In this module we extract complexity measures from Java ASTs."""

import logging
import copy
import re

import javalang


JAVA_BRANCH_TYPES = [
    'IfStatement', 'WhileStatement', 'DoStatement', 'ForStatement',
    'SwitchStatement',  # 'SwitchStatementCase',
    'TernaryExpression'  # , 'TryStatement',  cognitive_complexity_sonar does not increment nesting for TryStatement
]

JAVA_NESTING_TYPES = JAVA_BRANCH_TYPES + ['CatchClause']

JAVA_BRANCH_TYPES_MCCC = [
    'IfStatement', 'WhileStatement', 'DoStatement', 'ForStatement',
    'SwitchStatementCase',
    'TernaryExpression'  # , 'TryStatement',  cognitive_complexity_sonar does not increment nesting for TryStatement
]

JAVA_SEQUENCE_TYPES = ['&&', '||']

SM_DT = {'I': 'int', 'J': 'long', 'V': 'Void', 'Z': 'boolean', 'F': 'float', 'C': 'char', 'B': 'byte', 'D': 'double'}

SM_DT_INV = {v.lower(): k for k, v in SM_DT.items()}


[docs]class SourcemeterConversion(object): """This is a utility class for matching against Sourcemeter long_names.""" def __init__(self): self._param = re.compile('\(([\w,;,/,\$,\[]*)\)([\w,;,,\./,\$,\[]+)') # used to map sourcemeter params and return code self._log = logging.getLogger('coastSHARK') def _remove_bracket(self, dt): count = 0 while dt.startswith('['): dt = dt[1:] count += 1 return dt, count
[docs] def get_sm_long_name2(self, long_name): """Return a collapsed Sourcemeter long_name.""" self._long_name = long_name m = re.search(self._param, long_name) if not m: raise Exception('no match found for {}'.format(long_name)) ret, bc = self._remove_bracket(m.group(2)) if ret.startswith('L'): pruned = self._match_l(ret[1:-1]) if pruned.lower() in SM_DT_INV.keys(): ret = '[' * bc + SM_DT_INV[pruned.lower()] else: ret = '[' * bc + self._match_l('L{};'.format(pruned)) plist = [] rest_param = m.group(1) while len(rest_param) > 0 and rest_param[0] != ';': rest_param, bc = self._remove_bracket(rest_param) if rest_param.startswith('L'): tmp = rest_param.split(';') # reattach the rest if len(tmp) > 1: rest_param = ';'.join(tmp[1:]) + ';' # need to append ; again else: rest_param = '' # drop L, ; is already dropped pruned = self._match_l(tmp[0][1:]) if pruned.lower() in SM_DT_INV.keys(): r = SM_DT_INV[pruned.lower()] plist.append('[' * bc + r) else: if pruned: plist.append('[' * bc + 'L{};'.format(pruned)) else: plist.append('L') else: plist.append('[' * bc + rest_param[0]) rest_param = rest_param[1:] start = long_name.split('(') return '{}({}){}'.format(start[0], ''.join(plist), ret)
[docs] def get_sm_long_name(self, method): """Return a collapsed Sourcemeter long_name from our own method data.""" ps = [] for param in method['parameter_types']: pruned, bc = self._remove_bracket(param) if pruned.lower() in SM_DT_INV.keys(): ps.append('[' * bc + SM_DT_INV[pruned.lower()]) else: ps.append('[' * bc + 'L{};'.format(pruned)) if method['return_type'] == 'Void': ret = 'V' else: pruned, bc = self._remove_bracket(method['return_type']) if pruned.lower() in SM_DT_INV: ret = '[' * bc + SM_DT_INV[pruned.lower()] else: ret = '[' * bc + 'L{};'.format(pruned) params1 = ''.join(ps) params2 = '' for p in ps: if p.lower() == 'j': params2 += 'L' else: params2 += p var1 = '{}.{}.{}({}){}'.format(method['package_name'], method['class_name'], method['method_name'], params1, ret) var2 = '{}.{}.{}({}){}'.format(method['package_name'], method['class_name'], method['method_name'], params2, ret) return var1, var2
[docs] def get_sm_params(self, long_name): """Take a Sourcemeter long_name for a method and return a Mapping to our method parameter types and return types. This is used to match overloaded functions. """ self._long_name = long_name m = re.search(self._param, long_name) if not m: raise Exception('no match found for {}'.format(long_name)) params = self._match_params(m.group(1)) ret = self._match_return(m.group(2)) return params, ret
def _match_params(self, param): plist = [] rest_param = param while len(rest_param) > 0 and rest_param[0] != ';': rest_param, bc = self._remove_bracket(rest_param) if rest_param.startswith('L'): tmp = rest_param.split(';') # reattach the rest if len(tmp) > 1: rest_param = ';'.join(tmp[1:]) + ';' # need to append ; again else: rest_param = '' # drop L, ; is already dropped plist.append(self._match_l(tmp[0][1:])) else: plist.append(self._match_dt(rest_param[0])) rest_param = rest_param[1:] return plist def _match_l_inner(self, dt, match): num_ls = '' c = dt.split(match)[0] while c.startswith('L'): num_ls += 'L' c = c[1:] return '{}{}'.format(num_ls, dt.split(match)[-1]) def _match_l(self, dt): if '$' in dt: return self._match_l_inner(dt, '$') elif '.' in dt: return self._match_l_inner(dt, '.') elif '/' in dt: return self._match_l_inner(dt, '/') else: return dt def _match_dt(self, dt): dt, bc = self._remove_bracket(dt) try: r = '[' * bc + SM_DT[dt] except KeyError as e: self._log.error('[SM MATCH] no such dt key: {} for long_name: {}'.format(dt, self._long_name)) return r def _match_return(self, ret): ret, bc = self._remove_bracket(ret) if not ret.startswith('L'): return '[' * bc + self._match_dt(ret) else: # get everything between L and ; and return last part return '[' * bc + self._match_l(ret[1:-1])
[docs]class ComplexityJava(object): """Extracts complexity measures from the AST.""" def __init__(self, compilation_unit_ast): """Extract complexity measures from the compilation units AST. :param compilation_unit_ast: The AST of the compilation unit (.java file). """ self.ast = compilation_unit_ast self._log = logging.getLogger('coastSHARK') self._binop = re.compile('binop:"(!?\W+)"') # used to match operators for sequence changes def _method_params(self, method): """Extract method parameter and return types, we already do some Sourcmeter notations here, e.g., [ for array.""" ret_type = 'Void' if hasattr(method, 'return_type') and method.return_type: r = method.return_type # see below dim = '' if hasattr(r, 'dimensions') and r.dimensions: dim = '[' * len(r.dimensions) # see below while hasattr(r, 'sub_type') and r.sub_type: r = r.sub_type ret_type = '{}{}'.format(dim, r.name) params = [] for param in method.parameters: t = param.type # we count dimensions which gives us the array informaiton, e.g., int[] or int[][], we also add this in sourcemeter notation dim = '' if hasattr(t, 'dimensions') and t.dimensions: dim = '[' * len(t.dimensions) # method(String... args) if param.varargs: dim = '[' # we follow the chain because if we would not do that we would return java instead of ResultSet for java.sql.ResultSet while hasattr(t, 'sub_type') and t.sub_type: t = t.sub_type name = '{}{}'.format(dim, t.name) params.append(name) return params, ret_type def _find_max_level(self): """Find maximal level for Counting anonymous Classes. A normal ClassDeclaration counts as one level, a ClassCreator counts only if it has inline methods defined. """ max_level = 0 for path, method in self.ast: if type(method).__name__ != 'MethodDeclaration': continue level = 0 for i, p in enumerate(path): if type(p).__name__ == 'ClassDeclaration': level += 1 if type(p).__name__ == 'ClassCreator': if 'MethodDeclaration' in [type(n).__name__ for n in path[i + 1]]: level += 1 if level > max_level: max_level = level return max_level + 1 def _class_name(self, path): """Return class name in Sourcemeter notation.""" names = [] for i, n in enumerate(path): class_name = '$'.join(names) if type(n).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration']: # in this case we have a named class inside a method we prepend the counter ala $1NamedClass if type(path[i - 2]).__name__ == 'MethodDeclaration': names.append(str(self._count_map[n.position[0]][1]) + n.name) # normal named class, just append the name else: names.append(n.name) elif type(n).__name__ == 'ClassCreator' and self._has_immediate_method(n): names.append(str(self._count_map[n.position[0]][0])) class_name = '$'.join(names) return class_name def _has_immediate_method(self, node): """Check if the node has a method.""" for _, n1 in node: if type(n1).__name__ == 'MethodDeclaration' and len(_) == 2: return True return False def _parent_pos(self, path): """Return parent position for our relevant node types.""" first = None for n in reversed(path): if type(n).__name__ not in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration', 'ClassCreator']: continue if type(n).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration']: first = n break if type(n).__name__ == 'ClassCreator' and self._has_immediate_method(n): first = n break return first.position[0] def _parse_level_positions(self, ast, max_level=10): """Count positions and levels for our relevant node types.""" for current_level in range(1, max_level): for path, node in ast: # these do not count towards levels if type(node).__name__ not in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration', 'ClassCreator']: continue # these count towards the levels level = 0 for i, n in enumerate(path): if type(n).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration']: level += 1 if type(n).__name__ == 'ClassCreator' and self._has_immediate_method(n): level += 1 # evaluate the current level if level == current_level: # we do only count classCreator nodes with inline methods if type(node).__name__ == 'ClassCreator' and not self._has_immediate_method(node): continue parent_pos = self._parent_pos(path) line = node.position[0] # sadly, we need two lists, one for ClassCreators with methods and another for named classes that are defined inside methods if parent_pos not in self._level_map.keys(): self._level_map[parent_pos] = 0 if parent_pos not in self._level_map2.keys(): self._level_map2[parent_pos] = 0 # only count pos for inline classes in methods if type(node).__name__ in ['ClassDeclaration', 'InterfaceDeclaration', 'EnumDeclaration'] and type(path[i - 1]).__name__ == 'MethodDeclaration': self._level_map2[parent_pos] += 1 # or inline class creators with methods if type(node).__name__ == 'ClassCreator' and self._has_immediate_method(node): self._level_map[parent_pos] += 1 pos1 = self._level_map[parent_pos] pos2 = self._level_map2[parent_pos] self._count_map[line] = (pos1, pos2)
[docs] def cognitive_complexity(self): """Extract complexity metrics for all methods of the current file.""" self._level_map = {} self._level_map2 = {} self._count_map = {} self._parse_level_positions(self.ast, self._find_max_level()) package = None for path, node in self.ast.filter(javalang.tree.PackageDeclaration): package = node.name for path, method in self.ast: # we only are interested in methods and constructors if type(method).__name__ not in ['MethodDeclaration', 'ConstructorDeclaration']: continue full_name = self._class_name(path) method_name = method.name if type(method).__name__ in 'ConstructorDeclaration': method_name = '<init>' # sourcemeter notation self._log.debug('evaluating package {}, class {}, method {}'.format(package, full_name, method_name)) # gather metrics from the metric ast cogcs = self.cognitive_complexity_sonar(method) cc = self.cyclomatic_complexity(method) params, ret_type = self._method_params(method) # we may have interface methods with a body (default methods) im = method.body is None yield {'package_name': package, 'class_name': full_name, 'method_name': method_name, 'return_type': ret_type, 'parameter_types': params, 'cognitive_complexity_sonar': cogcs, 'cyclomatic_complexity': cc, 'is_interface_method': im}
[docs] def cognitive_complexity_sonar(self, method): """Extract cognitive complexity. Description here: https://www.sonarsource.com/docs/CognitiveComplexity.pdf """ cogcs = 0 cogcs += self._nesting(method) cogcs += self._binop_cc(method) cogcs += self._recursion(method) return cogcs
[docs] def cyclomatic_complexity(self, method): """Extract cyclomatic complexity (not really, we just count branch types).""" cc = 1 for c, n in method: if type(n).__name__ in JAVA_BRANCH_TYPES_MCCC: cc += 1 return cc
def _sequence_key(self, path): # 1. remove BinaryOperator from path until parent object is reached basepath = copy.deepcopy(path) while type(basepath[-1]).__name__ in ['BinaryOperation', 'list']: basepath = basepath[:-1] if type(basepath[-1]).__name__ == 'BinaryOperation' and basepath[-1].operator not in JAVA_SEQUENCE_TYPES: basepath = basepath[:-1] continue # 2. use the position of the parent of the BinaryOperation, e.g., if, while as key if not basepath[-1].position: raise Exception('no position for: {}'.format(basepath[-1])) return basepath[-1].position[0] def _recursion(self, method): """ Detect recursion in method. This has drawbacks! We will count recursion if we call a method of the same name as the current but with different attributes. We do not have the ability to discern attribute types only names and number of attributes. """ cc = 0 for p, n in method: if type(n).__name__ == 'MethodInvocation': if n.member == method.name: cc += 1 return cc def _binop_check(self, binop): return type(binop).__name__ == 'BinaryOperation' and binop.operator in JAVA_SEQUENCE_TYPES def _binop_cc(self, method): """Here we look at sequences of binary operations. To group them together we first find the position of one binary sequences containing element (if, etc.) As we are traversing the tree our first BinaryOperation for a certain containing element will be the middle of all BinaryStatements on the line. From there on we traverse the tree and get the sequence in prefix notation. We then change to infix notation so that we can count the changes in operators that the human would see and count that. """ tmp = {} sequences = {} for p, n in method: # this hould be the first binop per key if self._binop_check(n): key = self._sequence_key(p) if key not in tmp.keys(): tmp[key] = n # this appends the nodes to the list in prefix notation for k, v in tmp.items(): sequences[k] = [] for p, n in v: sequences[k].append(n) # create infix from prefix notation nseq = {} for k, v in sequences.items(): stack = [] for op in reversed(v): if self._binop_check(op): p1 = stack.pop() p2 = stack.pop() p = '' if hasattr(op, 'prefix_operators') and op.prefix_operators: p = '!' stack.append('({} binop:"{}{}" {})'.format(p1, p, op.operator, p2)) else: stack.append(op) nseq[k] = stack.pop() # get the human readable sequence of operators and prefix operator (!) for k, v in nseq.items(): tmp = [] for m in re.findall(self._binop, v): if m: tmp.append(m) sequences[k] = tmp self._log.debug('[seq] found sequence {} at pos {}'.format(tmp, k)) # count the changes in sequences via the operators, e.g., a && b || c full_cc = 0 for k, v in sequences.items(): cc = 1 op = v[0] for i, binop in enumerate(v[1:]): prefix = None if len(binop) > 2: # we have a prefix prefix = '!' binop = binop[1:] if binop != op: cc += 1 elif prefix: # binop == op but binop has prefix cc += 1 op = binop full_cc += cc self._log.debug('[seq] cc for sequence {} at {} is {}'.format(v, k, cc)) return full_cc def _nesting(self, method): cc = 0 for path, n in method: nest = 0 # MethodDeclaration does not count so we can do this here for p in path: if type(p).__name__ in JAVA_NESTING_TYPES: nest += 1 self._log.debug('[nest] {} in {} increase nesting level to {}'.format(p, 'JAVA_NESTING_TYPES', nest)) if type(p) == [] and type(p[0]).__name__ in JAVA_NESTING_TYPES: nest += 1 self._log.debug('[nest] {} in {} increase nesting level to {}'.format(p[0], 'JAVA_NESTING_TYPES', nest)) nesting = nest if type(n).__name__ in JAVA_BRANCH_TYPES + ['TryStatement']: cc += 1 + nesting self._log.debug('[nest] {} in {} increase cc to {} including nesting {}'.format(n, 'JAVA_BRANCH_TYPES and Try', cc, nesting)) return cc