Source code for oat_python.hypergraph

"""
Tools for working with hypergraphs.
"""

import copy
from fractions import Fraction
import numpy as np
import networkx as nx
import scipy

from typing import Dict, List, Any








[docs]
def unique_elements(L):
    """
    Returns a list of the unique elements of a list L.
    """
    unique_elements = []
    for l in L:
        if not l in unique_elements:
            unique_elements.append(l)
    return unique_elements




[docs]
def unique_values(key_val_iter):
    """
    Returns a pair of lists U and P such that P[i] equals 
    the set of all keys with value U[i], and U has no 
    repeated entries.
    """
    unique_values = []
    key_partition = []
    for k, v in key_val_iter:
        duplicate_v = False
        for ind, val in enumerate(unique_values):
            if v == val:
                key_partition[ind].append(k)
                duplicate_v = True
                break
        if not duplicate_v:
            unique_values.append(v)
            key_partition.append([k])
    return unique_values, key_partition



[docs]
def containment_map( list_of_collections ):
    """
    Given a list of colections with pairwise-empty intersections, denoted `list_of_collections`, return the dictionary `D` such that :math:`k \\in list_of_collections[ D[k] ]` for all `k`.
    """
    return { k: n for n, collection in enumerate(list_of_collections) for k in collection }



[docs]
def transpose(L):
    """
    Transpose a list of lists of integers, L, producing a list of lists P
    such that L[i][j] = P[j][i]
    """
    transpose = []
    for lnum, l in enumerate(L):
        for v in l:
            while len(transpose) < v + 1:
                transpose.append([])
            transpose[v].append(lnum)
    return transpose




#   =========================================================
#   RELABELING
#   =========================================================



[docs]
def relabel( hgdict: Dict[Any, List[Any]] ):
    """
    Relabel the nodes and edges of a hypergraph with consecutive integers.

    Given a hypergraph represented as a dictionary of lists, where each list is sorted in strictly ascending order,
    this function returns a copy of the hypergraph with nodes and edges relabeled with integers, and a dictionary
    containing the relabeling information.

    Parameters
    ----------
    hgdict : dict of list
        A hypergraph H represented by a dictionary of lists. Each list must be sorted in strictly ascending order.

    Returns
    -------
    H : list of list of int
        A copy of `hgdict` with nodes and edges relabeled with integers; formatted as a list of sorted lists.
    T : dict
        A dictionary containing relabeling information with the following keys:

            - 'new_edge_for_old_edge': maps original edge labels to new integer labels
            - 'new_node_for_old_node': maps original node labels to new integer labels
            - 'old_edge_for_new_edge': maps new integer edge labels to original edge labels
            - 'old_node_for_new_node': maps new integer node labels to original node labels

    Notes
    -----
    
    - `T["new_edge_for_old_edge"][old_edge]` gives the integer label of the edge in `H` corresponding to `old_edge`.
    - The other entries of `T` are defined similarly.
    """
    hgdict = copy.deepcopy(hgdict)

    # Check if the input is a dictionary
    if not isinstance(hgdict, dict):
        raise TypeError("Input must be a dictionary of sorted lists.")
    
    # Check if each value in the dictionary is a sorted list
    for edge_label, edge_vertices in hgdict.items():
        # Check if the value is a list
        if not isinstance(edge_vertices, list):
            raise TypeError(f"The value for key '{edge_label}' must be a list.")

        # Check if the list is sorted
        for i in range(len(edge_vertices) - 1):
            if not (edge_vertices[i] < edge_vertices[i + 1]):
                raise ValueError(f"The vertex list for hyperedge '{edge_label}' must be sorted in strictly ascending order, however edge {edge_label} has consecutive vertices {edge_vertices[i]} and {edge_vertices[i + 1]}.")        

    # relabel nodes by integers (this does note remove "parallel" vertices, meaning nodes that are in the exact same set of hyperedges)
    vertices    =   [x for l in hgdict.values() for x in l]
    nvl2ovl     =   sorted( list(set(vertices)) ) # new vertex label 2 old vertex label; sorting means that the map preserves order
    ovl2nvl     =   { key: ordinal for (ordinal,key) in enumerate(nvl2ovl) } # old vertex label 2 new vertex label

    nel2oel     =   list( hgdict.keys() )
    oel2nel     =   { k: p for p, k in enumerate( nel2oel ) }

    hypergraph  =   [ [ovl2nvl[v] for v in l ] for k, l in hgdict.items() ]
   
    translator  =   dict(
                            new_node_for_old_node    =   ovl2nvl,                            
                            new_edge_for_old_edge    =   oel2nel,
                            old_node_for_new_node    =   nvl2ovl,                             
                            old_edge_for_new_edge    =   nel2oel,
                        )
    
    return hypergraph, translator



#   =========================================================
#   REDUCTION
#   =========================================================



[docs]
def reduce_hypergraph_with_labels( hgdict ):
    """
    Given a hypergraph H represented by a dictionary of lists D, 
    - regard D as a binary relation B, and let B' be the relation obtained by deleting duplicate rows and columns
    - relabel the rows and columns of B' with integers
    - let H be the corresponding hypergraph, represented as a list of lists of integers
    Every node (respectively, edge) of B maps to a unique row (respectively, edge) of B'.  Let T be a dictionary with keys 
    `new_edge_for_old_edge`
    `new_node_for_old_node`
    `old_edges_for_new_edge`
    `old_nodes_for_new_node`
    and require `T["new_edge_for_old_edge"][old_edge]` is the integer label of the edge in `H` corresponding to `old_edge`.  The other entries of T are defined similarly.
    
    Return H, T
    """
    hgdict = copy.deepcopy(hgdict)

    # remove duplicate entries in each edge
    for k in hgdict.keys():
        hgdict[k] = list(set(hgdict[k]))  

    # relabel nodes by integers (this does note remove "parallel" vertices, meaning nodes that are in the exact same set of hyperedges)
    vertices = [x for l in hgdict.values() for x in l]
    nvl2ovl = list(set(vertices)) # new vertex label 2 old vertex label
    ovl2nvl  = { key: ordinal for (ordinal,key) in enumerate(nvl2ovl) } # old vertex label 2 new vertex label     
    hg = { k: [ovl2nvl[v] for v in l ] for k, l in hgdict.items() }
    
    # sort the nodes in each hyperedge, and remove duplicate nodes (this is not the same as removing parallel nodes)
    for key,val in hg.items():
        val = list(set(val))
        val.sort()
        hg[key] = val
    
    # unique_hyperedge_slices = the set of all sets of vertices realized by a hyperedge (without repeats; by contrast, in theory there could be multiple hyperedges with the same vertex sets)
    # hyperedge_label_partition[i] = the set of all hyperedge labels with vertex set unique_hyperedge_slices[i]
    unique_hyperedge_slices, hyperedge_label_partition = unique_values(hg.items())
    
    unique_hypernode_slices, hypernode_label_partition \
        = unique_values( enumerate( transpose( unique_hyperedge_slices ) ) )
    
    reduced_hg = transpose( unique_hypernode_slices )
    
    # hypernode_label_partition = [ [nvl2ovl[i] for l in hypernode_label_partition for i in l] ]
    
    nvl2nnvl             = containment_map( hypernode_label_partition ) # new vertex labels to new new vertex labels
    new_node_for_old_node = { k: nvl2nnvl[v] for k, v in ovl2nvl.items() }    
    new_edge_for_old_edge = { k: n for n, partition in enumerate(hyperedge_label_partition) for k in partition }

    old_edges_for_new_edge   =   hyperedge_label_partition
    old_nodes_for_new_node   =   [ [nvl2ovl[x] for x in partition] for partition in hypernode_label_partition ]

    translator  =   dict(
                            new_node_for_old_node    =   new_node_for_old_node, 
                            new_edge_for_old_edge    =   new_edge_for_old_edge,
                            old_nodes_for_new_node   =   old_nodes_for_new_node,
                            old_edges_for_new_edge   =   old_edges_for_new_edge,
                        )
    
    return reduced_hg, translator

    

[docs]
def reduce_hypergraph(hg):
    """
    Given a list of lists (regarded as the sparsity pattern of
    a sparse 0-1 matrix), returns a list of lists
    representing the sparse matrix obtained by deleting
    duplicate rows and columns
    """
    return transpose( unique_elements( transpose( np.unique( hg, axis=0 ) ) ) )



#   =========================================================
#   EDGE CONTAINMENT
#   =========================================================



[docs]
def edge_containment_graph( L ):
    """
    Given a length-m list of lists L, returns a networkx **directed** graph G with
    vertex set 0, .., m-1, where there exists a directed edge i->j iff L[i] is a subset
    of L[j]
    """
    m = len(L)    
    S = [set(x) for x in L]
    G = nx.Digraph()
    for vertex in range(m): G.add_node(vertex)
    for row in range(m):
        for col in range(m):
            if S[row].issubset(S[col]):
                G.add_edge( row, col )
    return G



[docs]
def edge_containment_graph_symmetrized( L ):
    """
    Given a length-m list of lists L, returns a networkx **undirected** graph G with
    vertex set 0, .., m-1, where i and j are adjacent iff L[i] contains L[j] or
    vice-versa
    """
    m = len(L)    
    S = [set(x) for x in L]
    G = nx.Graph()
    for vertex in range(m): G.add_node(vertex)    
    for row in range(m):
        for col in range(m):
            if S[row].issubset(S[col]):
                G.add_edge( row, col )
    return G



[docs]
def edge_containment_relation( L ):
    """
    Given a length-m list of lists L, returns an m x m matrix adjacency matrix `adj` of
    type `scipy.sparse.csr_matrix` such that
    - adj[i][j] = 1 if L[j] contains L[i]
    - adj[i][j] = 0, otherwise
    """
    m = len(L)
    G = [set(x) for x in L]
    
    # initialized the sparse data of an identity matrix
    rows    =   list(range(m))
    cols    =   list(range(m))
    data    =   [1 for _ in range(m)]

    # fill the matrix
    for row in range(m):
        for col in range(m):
            if row == col:
                continue # we've already taken care of these entries
            if G[row].issubset(G[col]):
                rows.append(row)
                cols.append(col)
                data.append(1)
    return scipy.sparse.csr_matrix((data, (rows, cols)), shape=(m, m))



[docs]
def edge_containment_relation_symmetrized( L ):
    """
    Given a length-m list of lists L, returns an m x m adjacency matrix `adj` of
    type `scipy.sparse.csr_matrix`, such that
    - adj[i][j] = 1 if L[i] contains L[j] or vice versa
    - adj[i][j] = 0, otherwise
    """
    m = len(L)
    G = [set(x) for x in L]

    # initialized the sparse data of an identity matrix
    rows    =   list(range(m))
    cols    =   list(range(m))
    data    =   [1 for _ in range(m)]

    # fill the matrix
    for row in range(m):
        for col in range(row+1,m):
            if G[row].issubset(G[col]):
                # add an entry
                rows.append(row)
                cols.append(col)
                data.append(1)
                # and its transpose
                rows.append(col)
                cols.append(row)
                data.append(1)                
    return scipy.sparse.csr_matrix((data, (rows, cols)), shape=(m, m))



[docs]
def networkx_nerve_for_hypergraph( L ):
    """
    Given a length-m list of lists L, returns a networkx graph G with
    - m edges
    - an undirected edge {i,j} iff L[i] is a subset of L[j], or vice versa
    """
    m = len(L)
    S = [set(x) for x in L]    
    G = nx.Graph()   
    for i in range(m):
        G.add_node(i)
    for i in range(m):
        for j in range(i+1,m):
            if S[i].issubset(S[j]) or S[j].issubset(S[i]):
                G.add_edge(i,j)
    return G