Graph Cuts

Code
import networkx as nx
import matplotlib.pyplot as plt
from itertools import combinations
from io import BytesIO
from PIL import Image
import gradio as gr
import pandas as pd

1 Introduction

Graph-based clustering is a useful way to break a graph into groups (or clusters) where the nodes in each group are closely linked. One popular technique for this is called a graph cut, which divides the graph into separate groups by cutting the fewest necessary connections (or edges).

In this notebook, we focus on normalized graph cuts. This approach not only splits the graph but also considers the entire connectivity of each group to keep the groups balanced.

We show how this works by visualizing and examining the partitioning process on a simple graph using normalized cuts.

2 Python Implementation

We start by defining a function to create a graph and visualize its partitions.

Code
def create_graph() -> nx.Graph:
    """Create and return the graph with weights."""
    G = nx.Graph()
    edges = [(1, 2, 3), (1, 3, 1), (2, 4, 1), (3, 4, 3)]
    G.add_weighted_edges_from(edges)
    return G


def visualize_partition(
    G: nx.Graph,
    partition: list[set[int]],
    ncut_value: float,
    combination_num: int | str,
    total_combinations: int,
) -> Image.Image:
    """Enhanced visualization with professional styling"""
    plt.figure(figsize=(10, 8), dpi=100)

    pos = {
        1: (0, 0),  # bottom (v1)
        2: (-1, 1),  # left (v2)
        3: (1, 1),  # right (v3)
        4: (0, 2),  # top (v4)
    }

    colors = ["#1f77b4" if node in partition[0] else "#ff7f0e" for node in G.nodes()]

    nx.draw_networkx_nodes(
        G, pos, node_color=colors, node_size=1200, edgecolors="black", linewidths=2
    )

    nx.draw_networkx_edges(G, pos, width=2, alpha=0.8, edge_color="gray")

    edge_labels = nx.get_edge_attributes(G, "weight")
    nx.draw_networkx_edge_labels(
        G,
        pos,
        edge_labels=edge_labels,
        font_size=12,
        label_pos=0.75,
        bbox=dict(facecolor="white", edgecolor="none", alpha=0.9),
    )

    labels = {node: f"v{node}" for node in G.nodes()}
    nx.draw_networkx_labels(
        G, pos, labels, font_size=14, font_weight="bold", font_family="sans-serif"
    )

    plt.title(
        f"Partition Analysis: Combination {combination_num}/{total_combinations}\n"
        f"NCut Value: {ncut_value:.4f}",
        fontsize=14,
        pad=20,
    )

    plt.margins(0.15)
    buf = BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    buf.seek(0)
    image = Image.open(buf).convert("RGB")
    plt.close()
    return image

The calculate_normalized_cut function calculates the normalized cut for a given partition, which helps us find the optimal partition with the minimal normalized cut value.

Code
def calculate_normalized_cut(
    G: nx.Graph, subset1: set[int], subset2: set[int]
) -> float:
    """Calculate normalized cut value for given partition"""
    cut_value = sum(G[u][v]["weight"] for u in subset1 for v in subset2 if v in G[u])

    vol1 = sum(dict(G.degree(weight="weight"))[v] for v in subset1)
    vol2 = sum(dict(G.degree(weight="weight"))[v] for v in subset2)

    if vol1 == 0 or vol2 == 0:
        return float("inf")

    return (cut_value / vol1) + (cut_value / vol2)

3 Interactive Dashboard

The dashboard below visualizes all possible partitions and identifies the optimal one based on the normalized cut value.

Code
def display_all_combinations_gradio(
    G: nx.Graph,
) -> tuple[list[tuple[Image.Image, str]], pd.DataFrame, dict]:
    """
    Returns:
        gallery_items: List of (image, caption)
        results_df: DataFrame with all results
        best_partition_info: Dictionary with best partition details
    """
    vertices = set(G.nodes())
    all_results = []

    for size in range(1, len(vertices)):
        for subset1 in combinations(vertices, size):
            subset1_set = set(subset1)
            subset2 = vertices - subset1_set
            ncut = calculate_normalized_cut(G, subset1_set, subset2)

            all_results.append(
                {
                    "partition_1": sorted(subset1_set),
                    "partition_2": sorted(subset2),
                    "ncut": ncut,
                    "cut_value": sum(
                        G[u][v]["weight"]
                        for u in subset1_set
                        for v in subset2
                        if v in G[u]
                    ),
                }
            )

    results_df = pd.DataFrame(all_results)
    results_df["combination"] = results_df.index + 1
    results_df = results_df.sort_values("ncut").reset_index(drop=True)

    best = results_df.iloc[0]
    best_partition_info = {
        "partitions": [best["partition_1"], best["partition_2"]],
        "ncut": best["ncut"],
        "cut_value": best["cut_value"],
    }

    gallery_items = []
    for idx, row in results_df.iterrows():
        caption = (
            f"Combination {idx+1}\n"
            f"NCut: {row['ncut']:.4f}\n"
            f"Partitions: {row['partition_1']} | {row['partition_2']}"
        )
        img = visualize_partition(
            G,
            [set(row["partition_1"]), set(row["partition_2"])],
            row["ncut"],
            idx + 1,
            len(results_df),
        )
        gallery_items.append((img, caption))

    return gallery_items, results_df, best_partition_info


with gr.Blocks(
    css="""gradio-app {background: #222222 !important}""",
    title="Graph Partition Analysis",
) as demo:
    with gr.Row():
        graph_img = gr.Image(label="Base Graph", interactive=False)
        best_partition_img = gr.Image(label="Best Partition", interactive=False)

    with gr.Row():
        with gr.Column(scale=3):
            with gr.Tabs():
                with gr.TabItem("All Partitions"):
                    gallery = gr.Gallery(
                        label="Partition Visualizations",
                        show_label=True,
                        columns=3,
                        rows=4,
                        object_fit="contain",
                        height="auto",
                    )

                with gr.TabItem("Analysis Results"):
                    results_table = gr.Dataframe(
                        headers=[
                            "Combination",
                            "Partition 1",
                            "Partition 2",
                            "NCut Value",
                            "Cut Value",
                        ],
                        datatype=["number", "str", "str", "number", "number"],
                        interactive=False,
                        wrap=True,
                    )

    best_partition_details = gr.JSON(label="Optimization Results")

    # Define the main analysis function
    def run_analysis():
        G = create_graph()

        # Visualize base graph
        base_img = visualize_partition(G, [set(), set(G.nodes())], 0, 0, 0)

        # Get results
        gallery_items, results_df, best_info = display_all_combinations_gradio(G)

        # Format results for display
        display_df = results_df[
            ["combination", "partition_1", "partition_2", "ncut", "cut_value"]
        ]
        display_df.columns = [
            "Combination",
            "Partition 1",
            "Partition 2",
            "NCut Value",
            "Cut Value",
        ]

        # Create best partition visualization
        best_img = visualize_partition(
            G, best_info["partitions"], best_info["ncut"], "Best", len(results_df)
        )

        return [
            base_img,
            gallery_items,
            display_df,
            best_img,
            {
                "Normalized Cut Value": best_info["ncut"],
                "Cut Value": best_info["cut_value"],
                "Partitions": best_info["partitions"],
            },
        ]

    demo.load(
        fn=run_analysis,
        outputs=[
            graph_img,
            gallery,
            results_table,
            best_partition_img,
            best_partition_details,
        ],
    )
Code
demo.launch(pwa=True, show_api=False, show_error=True)
Code
# Output of this cell set dynamically in Quarto filter step
import micropip await micropip.install('plotly==5.24.1'); import networkx as nx import matplotlib.pyplot as plt from itertools import combinations from io import BytesIO from PIL import Image import gradio as gr import pandas as pd def create_graph() -> nx.Graph: """Create and return the graph with weights.""" G = nx.Graph() edges = [(1, 2, 3), (1, 3, 1), (2, 4, 1), (3, 4, 3)] G.add_weighted_edges_from(edges) return G def visualize_partition( G: nx.Graph, partition: list[set[int]], ncut_value: float, combination_num: int | str, total_combinations: int, ) -> Image.Image: """Enhanced visualization with professional styling""" plt.figure(figsize=(10, 8), dpi=100) pos = { 1: (0, 0), # bottom (v1) 2: (-1, 1), # left (v2) 3: (1, 1), # right (v3) 4: (0, 2), # top (v4) } colors = ["#1f77b4" if node in partition[0] else "#ff7f0e" for node in G.nodes()] nx.draw_networkx_nodes( G, pos, node_color=colors, node_size=1200, edgecolors="black", linewidths=2 ) nx.draw_networkx_edges(G, pos, width=2, alpha=0.8, edge_color="gray") edge_labels = nx.get_edge_attributes(G, "weight") nx.draw_networkx_edge_labels( G, pos, edge_labels=edge_labels, font_size=12, label_pos=0.75, bbox=dict(facecolor="white", edgecolor="none", alpha=0.9), ) labels = {node: f"v{node}" for node in G.nodes()} nx.draw_networkx_labels( G, pos, labels, font_size=14, font_weight="bold", font_family="sans-serif" ) plt.title( f"Partition Analysis: Combination {combination_num}/{total_combinations}\n" f"NCut Value: {ncut_value:.4f}", fontsize=14, pad=20, ) plt.margins(0.15) buf = BytesIO() plt.savefig(buf, format="png", bbox_inches="tight") buf.seek(0) image = Image.open(buf).convert("RGB") plt.close() return image def calculate_normalized_cut( G: nx.Graph, subset1: set[int], subset2: set[int] ) -> float: """Calculate normalized cut value for given partition""" cut_value = sum(G[u][v]["weight"] for u in subset1 for v in subset2 if v in G[u]) vol1 = sum(dict(G.degree(weight="weight"))[v] for v in subset1) vol2 = sum(dict(G.degree(weight="weight"))[v] for v in subset2) if vol1 == 0 or vol2 == 0: return float("inf") return (cut_value / vol1) + (cut_value / vol2) def display_all_combinations_gradio( G: nx.Graph, ) -> tuple[list[tuple[Image.Image, str]], pd.DataFrame, dict]: """ Returns: gallery_items: List of (image, caption) results_df: DataFrame with all results best_partition_info: Dictionary with best partition details """ vertices = set(G.nodes()) all_results = [] for size in range(1, len(vertices)): for subset1 in combinations(vertices, size): subset1_set = set(subset1) subset2 = vertices - subset1_set ncut = calculate_normalized_cut(G, subset1_set, subset2) all_results.append( { "partition_1": sorted(subset1_set), "partition_2": sorted(subset2), "ncut": ncut, "cut_value": sum( G[u][v]["weight"] for u in subset1_set for v in subset2 if v in G[u] ), } ) results_df = pd.DataFrame(all_results) results_df["combination"] = results_df.index + 1 results_df = results_df.sort_values("ncut").reset_index(drop=True) best = results_df.iloc[0] best_partition_info = { "partitions": [best["partition_1"], best["partition_2"]], "ncut": best["ncut"], "cut_value": best["cut_value"], } gallery_items = [] for idx, row in results_df.iterrows(): caption = ( f"Combination {idx+1}\n" f"NCut: {row['ncut']:.4f}\n" f"Partitions: {row['partition_1']} | {row['partition_2']}" ) img = visualize_partition( G, [set(row["partition_1"]), set(row["partition_2"])], row["ncut"], idx + 1, len(results_df), ) gallery_items.append((img, caption)) return gallery_items, results_df, best_partition_info with gr.Blocks( css="""gradio-app {background: #222222 !important}""", title="Graph Partition Analysis", ) as demo: with gr.Row(): graph_img = gr.Image(label="Base Graph", interactive=False) best_partition_img = gr.Image(label="Best Partition", interactive=False) with gr.Row(): with gr.Column(scale=3): with gr.Tabs(): with gr.TabItem("All Partitions"): gallery = gr.Gallery( label="Partition Visualizations", show_label=True, columns=3, rows=4, object_fit="contain", height="auto", ) with gr.TabItem("Analysis Results"): results_table = gr.Dataframe( headers=[ "Combination", "Partition 1", "Partition 2", "NCut Value", "Cut Value", ], datatype=["number", "str", "str", "number", "number"], interactive=False, wrap=True, ) best_partition_details = gr.JSON(label="Optimization Results") # Define the main analysis function def run_analysis(): G = create_graph() # Visualize base graph base_img = visualize_partition(G, [set(), set(G.nodes())], 0, 0, 0) # Get results gallery_items, results_df, best_info = display_all_combinations_gradio(G) # Format results for display display_df = results_df[ ["combination", "partition_1", "partition_2", "ncut", "cut_value"] ] display_df.columns = [ "Combination", "Partition 1", "Partition 2", "NCut Value", "Cut Value", ] # Create best partition visualization best_img = visualize_partition( G, best_info["partitions"], best_info["ncut"], "Best", len(results_df) ) return [ base_img, gallery_items, display_df, best_img, { "Normalized Cut Value": best_info["ncut"], "Cut Value": best_info["cut_value"], "Partitions": best_info["partitions"], }, ] demo.load( fn=run_analysis, outputs=[ graph_img, gallery, results_table, best_partition_img, best_partition_details, ], ) demo.launch(pwa=True, show_api=False, show_error=True)