#! /usr/bin/python3
# simple check if two build or source graphs are equal
#
# two graphs are equal if they contain the same set of nodes and edges

from __future__ import print_function
import sys
sys.path.append('/usr/share/botch')
from util import read_graph


def graph_difference(g, h, verbose=False):
    if g.number_of_edges() != h.number_of_edges():
        print("g has %d edges and h as %d edges" %
              (g.number_of_edges(), h.number_of_edges()))
        return False

    if g.number_of_nodes() != h.number_of_nodes():
        print("g has %d nodes and h as %d nodes" %
              (g.number_of_nodes(), h.number_of_nodes()))
        return False

    def normalize_node(attr):
        # We delete the cudfversion attribute because vertices should be unique
        # by their name and version and not by the cudfversion which may have
        # been differently assigned
        if attr.get('version') is not None and 'cudfversion' in attr:
            del attr['cudfversion']
        if attr.get('kind') == "SCC":
            if not attr.get('binaries'):
                raise Exception("nodes of kind SCC need the sources attribute")
            attr['sources'] = frozenset(
                [s for s in attr['sources'].split(',')])
        elif attr.get('kind') == "InstSet":
            if not attr.get('binaries'):
                raise Exception(
                    "nodes of kind InstSet need the binaries attribute")
            attr['binaries'] = frozenset(
                [p for p in attr['binaries'].split(',')])
        elif attr.get('kind') == "SrcPkg":
            pass
        return frozenset(attr.items())

    def normalize_edge(attr):
        # we delete the id attribute as it has no meaning. Edges are unique by
        # the vertices they connect because there can be no multi-edges
        if 'id' in attr:
            del attr['id']
        # only buildgraphs have the kind attribute
        if attr.get('kind'):
            if attr['kind'] == "buildsfrom":
                if not attr.get('binaries'):
                    raise Exception(
                        "edges of kind buildsfrom need the binaries attribute")
                attr['binaries'] = frozenset(
                    [s for s in attr['binaries'].split(',')])
            elif attr['kind'] == "builddep":
                pass
            else:
                raise Exception("unknown edge type %s" % attr['kind'])
        else:
            # this is a srcgraph
            if attr.get('binaries'):
                attr['binaries'] = frozenset(
                    [s for s in attr['binaries'].split(',')])
            if attr.get('strong'):
                attr['strong'] = frozenset(
                    [s for s in attr['strong'].split(',')])
            if attr.get('strong_direct'):
                attr['strong_direct'] = frozenset(
                    [s for s in attr['strong_direct'].split(',')])
        if attr.get("annotation"):
            attr['annotation'] = frozenset(
                [s for s in attr['annotation'].split(',')])
        return frozenset(attr.items())

    g_nodes = set([(n, normalize_node(attr))
                  for n, attr in g.nodes(data=True)])
    h_nodes = set([(n, normalize_node(attr))
                  for n, attr in h.nodes(data=True)])
    g_edges = set([(n1, n2, normalize_edge(attr))
                  for n1, n2, attr in g.edges(data=True)])
    h_edges = set([(n1, n2, normalize_edge(attr))
                  for n1, n2, attr in h.edges(data=True)])

    if h_nodes == g_nodes and g_edges == h_edges:
        return True

    print("the graphs disagree on either the node naming, their values or " +
          "the graph structure", file=sys.stderr)

    # if this test did not succeed, then maybe just the node names are
    # different but their content actually matches:

    g_node_vals = set([a for _, a in g_nodes])
    h_node_vals = set([a for _, a in h_nodes])

    if g_node_vals != h_node_vals:
        print("the set of node attributes of g and h differ")
        if g_node_vals - h_node_vals:
            print("nodes in g but not in h", g_node_vals - h_node_vals)
        if h_node_vals - g_node_vals:
            print("nodes in h but not in g", h_node_vals - g_node_vals)
        return False

    if (len(g_nodes) != len(g_node_vals) or len(h_nodes) != len(h_node_vals)):
        print("there exist duplicate node attributes - this should not " +
              "happen for either build or source graphs", file=sys.stderr)
        return False

    print("the graphs disagree on either the node naming or the graph " +
          "structure", file=sys.stderr)

    # we now know that the set of node attributes is equal and that there are
    # no duplicates
    # thus we can now create a mapping between node ids of the two graphs

    # this dictionary stores for each attribute its node id in h
    node_mapping_h = {attr: n for n, attr in h_nodes}

    # this dictionary stores for each node id in g the node id in h
    node_mapping = {n: node_mapping_h[attr] for n, attr in g_nodes}

    # now go over all edges in g and check if they are in h as well
    # also check whether the attributes are equal
    # we do not check the other way round because we know that the number of
    # edges in both graphs is equal
    for n1, n2, attr in g_edges:
        n3 = node_mapping[n1]
        n4 = node_mapping[n2]
        if not h.has_edge(n3, n4):
            print("edge in g but not in h", file=sys.stderr)
            print(g.adj[n1][n2])
            return False
        if attr != frozenset(h.adj[n3][n4].items()):
            print("edge attributes do not agree", file=sys.stderr)
            print(attr)
            print(h.adj[n3][n4])
            return False

    return True


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description=("Check if two graphs are the same"))
    parser.add_argument(
        "g", type=read_graph, help="graph g in GraphML or dot format")
    parser.add_argument(
        "h", type=read_graph, help="graph h in GraphML or dot format")
    parser.add_argument(
        '-v', '--verbose', action='store_true', help='be verbose')
    args = parser.parse_args()
    # TODO: also support graphs that are neither buildgraph nor srcgraph
    ret = graph_difference(args.g, args.h, args.verbose)
    exit(not ret)
