#! /usr/bin/python3

from __future__ import print_function
import networkx as nx
import sys
import gzip
from collections import defaultdict


def calcportsmetric(g, verbose=False):
    if verbose:
        print("calculating sccs", file=sys.stderr)
    scc = list(nx.strongly_connected_components(g))
    if verbose:
        print("condensing", file=sys.stderr)
    h = nx.condensation(g, scc)
    if verbose:
        print("get roots", file=sys.stderr)
    i = h.reverse()
    for node in nx.dfs_postorder_nodes(i):
        pred = h.predecessors(node)
        # do not include the version so that source packages which exist in
        # multiple versions are only counted once
        mynames = set([g.node[n]['name'] for n in h.node[node]['members']])
        if not pred:
            h.node[node]['k'] = mynames
        else:
            h.node[node]['k'] = set.union(
                mynames, *[h.node[n]['k'] for n in pred])
    # # slow and stupid version of the above (to check the fast version for
    # # correctness)
    # for node in h.nodes():
    # print scc[node], [scc[n] for n in nx.dfs_tree(i,node).nodes()]
    #    descendents = nx.dfs_tree(i,node).nodes()
    #    if descendents:
    #        h.node[node]['k'] = set.union(*[set([g.node[v]['name']
    #                                             for v in scc[n]])
    #                                        for n in descendents])
    #    else:
    #        h.node[node]['k'] = set([g.node[n]['name'] for n in scc[node]])
    if verbose:
        print("getting order", file=sys.stderr)
    # we want the results per source package, so we need to merge the values
    # for those source packages which exist in different versions
    srcmetrics = defaultdict(set)
    for n in h.nodes():
        a = h.node[n]['k']
        for node in h.node[n]['members']:
            srcmetrics[g.node[node]['name']].update(a)
    return srcmetrics


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description=("Calculate a metric for source package importance " +
                     "based on how many source packages (minimum and " +
                     "maximum) become compilable in a bootstrapping " +
                     "scenario through the compilability of a given source " +
                     "package"))
    parser.add_argument(
        'strongsrcgraph', metavar='strongsrcgraph.xml', type=nx.read_graphml,
        help="A dependency graph in GraphML format connecting source " +
             "package vertices with the source packages that build their " +
             "strong dependencies.")
    parser.add_argument(
        'closuresrcgraph', metavar='closuresrcgraph.xml', type=nx.read_graphml,
        help="A dependency graph in GraphML format connecting source " +
             "package vertices with the source packages that build their " +
             "dependency closure.")
    parser.add_argument('--online', action='store_true',
                        help="Retrieve popcon results for source packages")
    parser.add_argument(
        '-v', '--verbose', action='store_true', help='be verbose')
    args = parser.parse_args()
    strongres = calcportsmetric(args.strongsrcgraph, args.verbose)
    closureres = calcportsmetric(args.closuresrcgraph, args.verbose)

    if args.online:
        if args.verbose:
            print(
                "getting popcon results for source packages", file=sys.stderr)
        if args.verbose:
            print("downloading...", file=sys.stderr)
        try:
            import urllib.request as urllib
        except:
            import urllib
        from io import BytesIO
        r = BytesIO(
            urllib.urlopen(
                "http://popcon.debian.org/source/by_inst.gz").read())
        if args.verbose:
            print("done downloading, opening...", file=sys.stderr)
        popconvalues = dict()
        with gzip.GzipFile(fileobj=r) as popconf:
            # parse popcon file
            for line in popconf:
                if line.startswith(b'#'):
                    continue
                tokens = line.split()
                if len(tokens) != 7:
                    continue
                popconvalues[tokens[1].decode()] = int(tokens[2])

        # now replace every entry by a tuple of its length and the sum of their
        # popcon values
        result = dict()
        for k, v in list(strongres.items()):
            psum = sum([popconvalues.get(n, 0) for n in v])
            strongres[k] = (len(v), psum)
        for k, v in list(closureres.items()):
            psum = sum([popconvalues.get(n, 0) for n in v])
            closureres[k] = (len(v), psum)

        if args.verbose:
            print("printing results", file=sys.stderr)
        # we take the set intersection because in case a package is not
        # installable, then there exist no strong dependencies for it but its
        # dependency closure will still be calculated. So in that case there
        # may be packages which are in the closure graph but not in the source
        # graph. We only want those packages that are in both graphs because
        # we don't care about the subgraphs of uninstallable packages.
        for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
            print("src:%s\t%d\t%d\t%d\t%d" % (srcname,
                                              strongres[srcname][0],
                                              closureres[srcname][0],
                                              strongres[srcname][1],
                                              closureres[srcname][1]))
    else:
        if args.verbose:
            print("printing results", file=sys.stderr)
        for srcname in sorted(set(strongres.keys()) & set(closureres.keys())):
            print("src:%s\t%d\t%d" %
                  (srcname, len(strongres[srcname]), len(closureres[srcname])))
