Source code for oat_python.hypergraph

"""
Tools for working with hypergraphs.
"""

import copy
from fractions import Fraction
import numpy as np
import networkx as nx
import scipy

from typing import Dict, List, Any







[docs] def unique_elements(L): """ Returns a list of the unique elements of a list L. """ unique_elements = [] for l in L: if not l in unique_elements: unique_elements.append(l) return unique_elements
[docs] def unique_values(key_val_iter): """ Returns a pair of lists U and P such that P[i] equals the set of all keys with value U[i], and U has no repeated entries. """ unique_values = [] key_partition = [] for k, v in key_val_iter: duplicate_v = False for ind, val in enumerate(unique_values): if v == val: key_partition[ind].append(k) duplicate_v = True break if not duplicate_v: unique_values.append(v) key_partition.append([k]) return unique_values, key_partition
[docs] def containment_map( list_of_collections ): """ Given a list of colections with pairwise-empty intersections, denoted `list_of_collections`, return the dictionary `D` such that :math:`k \\in list_of_collections[ D[k] ]` for all `k`. """ return { k: n for n, collection in enumerate(list_of_collections) for k in collection }
[docs] def transpose(L): """ Transpose a list of lists of integers, L, producing a list of lists P such that L[i][j] = P[j][i] """ transpose = [] for lnum, l in enumerate(L): for v in l: while len(transpose) < v + 1: transpose.append([]) transpose[v].append(lnum) return transpose
# ========================================================= # RELABELING # =========================================================
[docs] def relabel( hgdict: Dict[Any, List[Any]] ): """ Relabel the nodes and edges of a hypergraph with consecutive integers. Given a hypergraph represented as a dictionary of lists, where each list is sorted in strictly ascending order, this function returns a copy of the hypergraph with nodes and edges relabeled with integers, and a dictionary containing the relabeling information. Parameters ---------- hgdict : dict of list A hypergraph H represented by a dictionary of lists. Each list must be sorted in strictly ascending order. Returns ------- H : list of list of int A copy of `hgdict` with nodes and edges relabeled with integers; formatted as a list of sorted lists. T : dict A dictionary containing relabeling information with the following keys: - 'new_edge_for_old_edge': maps original edge labels to new integer labels - 'new_node_for_old_node': maps original node labels to new integer labels - 'old_edge_for_new_edge': maps new integer edge labels to original edge labels - 'old_node_for_new_node': maps new integer node labels to original node labels Notes ----- - `T["new_edge_for_old_edge"][old_edge]` gives the integer label of the edge in `H` corresponding to `old_edge`. - The other entries of `T` are defined similarly. """ hgdict = copy.deepcopy(hgdict) # Check if the input is a dictionary if not isinstance(hgdict, dict): raise TypeError("Input must be a dictionary of sorted lists.") # Check if each value in the dictionary is a sorted list for edge_label, edge_vertices in hgdict.items(): # Check if the value is a list if not isinstance(edge_vertices, list): raise TypeError(f"The value for key '{edge_label}' must be a list.") # Check if the list is sorted for i in range(len(edge_vertices) - 1): if not (edge_vertices[i] < edge_vertices[i + 1]): raise ValueError(f"The vertex list for hyperedge '{edge_label}' must be sorted in strictly ascending order, however edge {edge_label} has consecutive vertices {edge_vertices[i]} and {edge_vertices[i + 1]}.") # relabel nodes by integers (this does note remove "parallel" vertices, meaning nodes that are in the exact same set of hyperedges) vertices = [x for l in hgdict.values() for x in l] nvl2ovl = sorted( list(set(vertices)) ) # new vertex label 2 old vertex label; sorting means that the map preserves order ovl2nvl = { key: ordinal for (ordinal,key) in enumerate(nvl2ovl) } # old vertex label 2 new vertex label nel2oel = list( hgdict.keys() ) oel2nel = { k: p for p, k in enumerate( nel2oel ) } hypergraph = [ [ovl2nvl[v] for v in l ] for k, l in hgdict.items() ] translator = dict( new_node_for_old_node = ovl2nvl, new_edge_for_old_edge = oel2nel, old_node_for_new_node = nvl2ovl, old_edge_for_new_edge = nel2oel, ) return hypergraph, translator
# ========================================================= # REDUCTION # =========================================================
[docs] def reduce_hypergraph_with_labels( hgdict ): """ Given a hypergraph H represented by a dictionary of lists D, - regard D as a binary relation B, and let B' be the relation obtained by deleting duplicate rows and columns - relabel the rows and columns of B' with integers - let H be the corresponding hypergraph, represented as a list of lists of integers Every node (respectively, edge) of B maps to a unique row (respectively, edge) of B'. Let T be a dictionary with keys `new_edge_for_old_edge` `new_node_for_old_node` `old_edges_for_new_edge` `old_nodes_for_new_node` and require `T["new_edge_for_old_edge"][old_edge]` is the integer label of the edge in `H` corresponding to `old_edge`. The other entries of T are defined similarly. Return H, T """ hgdict = copy.deepcopy(hgdict) # remove duplicate entries in each edge for k in hgdict.keys(): hgdict[k] = list(set(hgdict[k])) # relabel nodes by integers (this does note remove "parallel" vertices, meaning nodes that are in the exact same set of hyperedges) vertices = [x for l in hgdict.values() for x in l] nvl2ovl = list(set(vertices)) # new vertex label 2 old vertex label ovl2nvl = { key: ordinal for (ordinal,key) in enumerate(nvl2ovl) } # old vertex label 2 new vertex label hg = { k: [ovl2nvl[v] for v in l ] for k, l in hgdict.items() } # sort the nodes in each hyperedge, and remove duplicate nodes (this is not the same as removing parallel nodes) for key,val in hg.items(): val = list(set(val)) val.sort() hg[key] = val # unique_hyperedge_slices = the set of all sets of vertices realized by a hyperedge (without repeats; by contrast, in theory there could be multiple hyperedges with the same vertex sets) # hyperedge_label_partition[i] = the set of all hyperedge labels with vertex set unique_hyperedge_slices[i] unique_hyperedge_slices, hyperedge_label_partition = unique_values(hg.items()) unique_hypernode_slices, hypernode_label_partition \ = unique_values( enumerate( transpose( unique_hyperedge_slices ) ) ) reduced_hg = transpose( unique_hypernode_slices ) # hypernode_label_partition = [ [nvl2ovl[i] for l in hypernode_label_partition for i in l] ] nvl2nnvl = containment_map( hypernode_label_partition ) # new vertex labels to new new vertex labels new_node_for_old_node = { k: nvl2nnvl[v] for k, v in ovl2nvl.items() } new_edge_for_old_edge = { k: n for n, partition in enumerate(hyperedge_label_partition) for k in partition } old_edges_for_new_edge = hyperedge_label_partition old_nodes_for_new_node = [ [nvl2ovl[x] for x in partition] for partition in hypernode_label_partition ] translator = dict( new_node_for_old_node = new_node_for_old_node, new_edge_for_old_edge = new_edge_for_old_edge, old_nodes_for_new_node = old_nodes_for_new_node, old_edges_for_new_edge = old_edges_for_new_edge, ) return reduced_hg, translator
[docs] def reduce_hypergraph(hg): """ Given a list of lists (regarded as the sparsity pattern of a sparse 0-1 matrix), returns a list of lists representing the sparse matrix obtained by deleting duplicate rows and columns """ return transpose( unique_elements( transpose( np.unique( hg, axis=0 ) ) ) )
# ========================================================= # EDGE CONTAINMENT # =========================================================
[docs] def edge_containment_graph( L ): """ Given a length-m list of lists L, returns a networkx **directed** graph G with vertex set 0, .., m-1, where there exists a directed edge i->j iff L[i] is a subset of L[j] """ m = len(L) S = [set(x) for x in L] G = nx.Digraph() for vertex in range(m): G.add_node(vertex) for row in range(m): for col in range(m): if S[row].issubset(S[col]): G.add_edge( row, col ) return G
[docs] def edge_containment_graph_symmetrized( L ): """ Given a length-m list of lists L, returns a networkx **undirected** graph G with vertex set 0, .., m-1, where i and j are adjacent iff L[i] contains L[j] or vice-versa """ m = len(L) S = [set(x) for x in L] G = nx.Graph() for vertex in range(m): G.add_node(vertex) for row in range(m): for col in range(m): if S[row].issubset(S[col]): G.add_edge( row, col ) return G
[docs] def edge_containment_relation( L ): """ Given a length-m list of lists L, returns an m x m matrix adjacency matrix `adj` of type `scipy.sparse.csr_matrix` such that - adj[i][j] = 1 if L[j] contains L[i] - adj[i][j] = 0, otherwise """ m = len(L) G = [set(x) for x in L] # initialized the sparse data of an identity matrix rows = list(range(m)) cols = list(range(m)) data = [1 for _ in range(m)] # fill the matrix for row in range(m): for col in range(m): if row == col: continue # we've already taken care of these entries if G[row].issubset(G[col]): rows.append(row) cols.append(col) data.append(1) return scipy.sparse.csr_matrix((data, (rows, cols)), shape=(m, m))
[docs] def edge_containment_relation_symmetrized( L ): """ Given a length-m list of lists L, returns an m x m adjacency matrix `adj` of type `scipy.sparse.csr_matrix`, such that - adj[i][j] = 1 if L[i] contains L[j] or vice versa - adj[i][j] = 0, otherwise """ m = len(L) G = [set(x) for x in L] # initialized the sparse data of an identity matrix rows = list(range(m)) cols = list(range(m)) data = [1 for _ in range(m)] # fill the matrix for row in range(m): for col in range(row+1,m): if G[row].issubset(G[col]): # add an entry rows.append(row) cols.append(col) data.append(1) # and its transpose rows.append(col) cols.append(row) data.append(1) return scipy.sparse.csr_matrix((data, (rows, cols)), shape=(m, m))
[docs] def networkx_nerve_for_hypergraph( L ): """ Given a length-m list of lists L, returns a networkx graph G with - m edges - an undirected edge {i,j} iff L[i] is a subset of L[j], or vice versa """ m = len(L) S = [set(x) for x in L] G = nx.Graph() for i in range(m): G.add_node(i) for i in range(m): for j in range(i+1,m): if S[i].issubset(S[j]) or S[j].issubset(S[i]): G.add_edge(i,j) return G