Map k spanning tree

FlorentinD · FlorentinD · commit 84191e4595ad · 2025-10-30T17:09:26.000+01:00
diff --git a/graphdatascience/procedure_surface/api/pathfinding/k_spanning_tree_endpoints.py b/graphdatascience/procedure_surface/api/pathfinding/k_spanning_tree_endpoints.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from graphdatascience.procedure_surface.api.base_result import BaseResult
+from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
+
+
+class KSpanningTreeWriteResult(BaseResult):
+    effective_node_count: int
+    write_millis: int
+    post_processing_millis: int
+    pre_processing_millis: int
+    compute_millis: int
+    configuration: dict[str, Any]
+
+
+class KSpanningTreeEndpoints(ABC):
+    @abstractmethod
+    def write(
+        self,
+        G: GraphV2,
+        k: int,
+        write_property: str,
+        source_node: int,
+        relationship_weight_property: str | None = None,
+        objective: str = "minimum",
+        relationship_types: list[str] | None = None,
+        node_labels: list[str] | None = None,
+        sudo: bool = False,
+        log_progress: bool = True,
+        username: str | None = None,
+        concurrency: int | None = None,
+        job_id: str | None = None,
+        write_concurrency: int | None = None,
+    ) -> KSpanningTreeWriteResult:
+        """
+        Runs the k-Spanning tree algorithm and writes the result back to the Neo4j database.
+
+        Parameters
+        ----------
+        G : GraphV2
+            The graph to run the algorithm on.
+        k : int
+            The number of spanning trees to compute.
+        write_property : str
+            The property name to store the edge weight.
+        source_node : int
+            The source node (root) for the k-Spanning trees.
+        relationship_weight_property : str, optional
+            The name of the relationship property to use as weights.
+        objective : str, default="minimum"
+            The objective function to optimize. Either "minimum" or "maximum".
+        relationship_types : list[str], optional
+            Filter to only use relationships of specific types.
+        node_labels : list[str], optional
+            Filter to only use nodes with specific labels.
+        sudo : bool, default=False
+            Whether to run with elevated privileges.
+        log_progress : bool, default=True
+            Whether to log progress during execution.
+        username : str, optional
+            The username to use for logging.
+        concurrency : int, optional
+            The number of threads to use for parallel computation.
+        job_id : str, optional
+            An optional job ID for tracking the operation.
+        write_concurrency : int, optional
+            The number of threads to use for writing results.
+
+        Returns
+        -------
+        KSpanningTreeWriteResult
+            Result containing statistics and timing information.
+        """
+        ...
diff --git a/graphdatascience/procedure_surface/arrow/pathfinding/k_spanning_tree_arrow_endpoints.py b/graphdatascience/procedure_surface/arrow/pathfinding/k_spanning_tree_arrow_endpoints.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+from graphdatascience.arrow_client.authenticated_flight_client import AuthenticatedArrowClient
+from graphdatascience.arrow_client.v2.remote_write_back_client import RemoteWriteBackClient
+from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
+from graphdatascience.procedure_surface.api.pathfinding.k_spanning_tree_endpoints import (
+    KSpanningTreeEndpoints,
+    KSpanningTreeWriteResult,
+)
+from graphdatascience.procedure_surface.arrow.node_property_endpoints import NodePropertyEndpointsHelper
+
+
+class KSpanningTreeArrowEndpoints(KSpanningTreeEndpoints):
+    def __init__(
+        self,
+        arrow_client: AuthenticatedArrowClient,
+        write_back_client: RemoteWriteBackClient | None = None,
+        show_progress: bool = False,
+    ):
+        self._endpoints_helper = NodePropertyEndpointsHelper(
+            arrow_client, write_back_client=write_back_client, show_progress=show_progress
+        )
+
+    def write(
+        self,
+        G: GraphV2,
+        k: int,
+        write_property: str,
+        source_node: int,
+        relationship_weight_property: str | None = None,
+        objective: str = "minimum",
+        relationship_types: list[str] | None = None,
+        node_labels: list[str] | None = None,
+        sudo: bool = False,
+        log_progress: bool = True,
+        username: str | None = None,
+        concurrency: int | None = None,
+        job_id: str | None = None,
+        write_concurrency: int | None = None,
+    ) -> KSpanningTreeWriteResult:
+        config = self._endpoints_helper.create_base_config(
+            G,
+            k=k,
+            sourceNode=source_node,
+            relationshipWeightProperty=relationship_weight_property,
+            objective=objective,
+            relationshipTypes=relationship_types,
+            nodeLabels=node_labels,
+            sudo=sudo,
+            logProgress=log_progress,
+            username=username,
+            concurrency=concurrency,
+            jobId=job_id,
+            writeConcurrency=write_concurrency,
+        )
+
+        result = self._endpoints_helper.run_job_and_write(
+            "v2/pathfinding.kSpanningTree",
+            G,
+            config,
+            property_overwrites={write_property: write_property},
+            write_concurrency=write_concurrency,
+            concurrency=None,
+        )
+
+        return KSpanningTreeWriteResult(**result)
diff --git a/graphdatascience/procedure_surface/cypher/pathfinding/k_spanning_tree_cypher_endpoints.py b/graphdatascience/procedure_surface/cypher/pathfinding/k_spanning_tree_cypher_endpoints.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from graphdatascience.call_parameters import CallParameters
+from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
+from graphdatascience.procedure_surface.api.pathfinding.k_spanning_tree_endpoints import (
+    KSpanningTreeEndpoints,
+    KSpanningTreeWriteResult,
+)
+from graphdatascience.procedure_surface.utils.config_converter import ConfigConverter
+from graphdatascience.query_runner.query_runner import QueryRunner
+
+
+class KSpanningTreeCypherEndpoints(KSpanningTreeEndpoints):
+    def __init__(self, query_runner: QueryRunner):
+        self._query_runner = query_runner
+
+    def write(
+        self,
+        G: GraphV2,
+        k: int,
+        write_property: str,
+        source_node: int,
+        relationship_weight_property: str | None = None,
+        objective: str = "minimum",
+        relationship_types: list[str] | None = None,
+        node_labels: list[str] | None = None,
+        sudo: bool = False,
+        log_progress: bool = True,
+        username: str | None = None,
+        concurrency: int | None = None,
+        job_id: str | None = None,
+        write_concurrency: int | None = None,
+    ) -> KSpanningTreeWriteResult:
+        config = ConfigConverter.convert_to_gds_config(
+            k=k,
+            writeProperty=write_property,
+            sourceNode=source_node,
+            relationshipWeightProperty=relationship_weight_property,
+            objective=objective,
+            relationshipTypes=relationship_types,
+            nodeLabels=node_labels,
+            sudo=sudo,
+            logProgress=log_progress,
+            username=username,
+            concurrency=concurrency,
+            jobId=job_id,
+            writeConcurrency=write_concurrency,
+        )
+        params = CallParameters(graph_name=G.name(), config=config)
+        params.ensure_job_id_in_config()
+
+        result = self._query_runner.call_procedure(
+            "gds.kSpanningTree.write", params=params, logging=log_progress
+        ).squeeze()
+
+        return KSpanningTreeWriteResult(**result)
diff --git a/graphdatascience/session/session_v2_endpoints.py b/graphdatascience/session/session_v2_endpoints.py
@@ -33,6 +33,7 @@
 from graphdatascience.procedure_surface.api.node_embedding.hashgnn_endpoints import HashGNNEndpoints
 from graphdatascience.procedure_surface.api.node_embedding.node2vec_endpoints import Node2VecEndpoints
 from graphdatascience.procedure_surface.api.pathfinding.all_shortest_path_endpoints import AllShortestPathEndpoints
+from graphdatascience.procedure_surface.api.pathfinding.k_spanning_tree_endpoints import KSpanningTreeEndpoints
 from graphdatascience.procedure_surface.api.pathfinding.prize_steiner_tree_endpoints import PrizeSteinerTreeEndpoints
 from graphdatascience.procedure_surface.api.pathfinding.shortest_path_endpoints import ShortestPathEndpoints
 from graphdatascience.procedure_surface.api.pathfinding.spanning_tree_endpoints import SpanningTreeEndpoints
@@ -90,6 +91,9 @@
 from graphdatascience.procedure_surface.arrow.pathfinding.all_shortest_path_arrow_endpoints import (
     AllShortestPathArrowEndpoints,
 )
+from graphdatascience.procedure_surface.arrow.pathfinding.k_spanning_tree_arrow_endpoints import (
+    KSpanningTreeArrowEndpoints,
+)
 from graphdatascience.procedure_surface.arrow.pathfinding.prize_steiner_tree_arrow_endpoints import (
     PrizeSteinerTreeArrowEndpoints,
 )
@@ -218,6 +222,12 @@ def kmeans(self) -> KMeansEndpoints:
     def knn(self) -> KnnEndpoints:
         return KnnArrowEndpoints(self._arrow_client, self._write_back_client, show_progress=self._show_progress)
 
+    @property
+    def k_spanning_tree(self) -> KSpanningTreeEndpoints:
+        return KSpanningTreeArrowEndpoints(
+            self._arrow_client, self._write_back_client, show_progress=self._show_progress
+        )
+
     @property
     def label_propagation(self) -> LabelPropagationEndpoints:
         return LabelPropagationArrowEndpoints(
diff --git a/graphdatascience/tests/integrationV2/procedure_surface/arrow/pathfinding/test_k_spanning_tree_arrow_endpoints.py b/graphdatascience/tests/integrationV2/procedure_surface/arrow/pathfinding/test_k_spanning_tree_arrow_endpoints.py
@@ -0,0 +1,73 @@
+from typing import Generator
+
+import pytest
+
+from graphdatascience.arrow_client.authenticated_flight_client import AuthenticatedArrowClient
+from graphdatascience.arrow_client.v2.remote_write_back_client import RemoteWriteBackClient
+from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
+from graphdatascience.procedure_surface.arrow.pathfinding.k_spanning_tree_arrow_endpoints import (
+    KSpanningTreeArrowEndpoints,
+)
+from graphdatascience.query_runner.query_runner import QueryRunner
+from graphdatascience.tests.integrationV2.procedure_surface.arrow.graph_creation_helper import (
+    create_graph_from_db,
+)
+
+graph = """
+        CREATE
+            (a: Node {id: 0}),
+            (b: Node {id: 1}),
+            (c: Node {id: 2}),
+            (d: Node {id: 3}),
+            (e: Node {id: 4}),
+            (f: Node {id: 5}),
+            (a)-[:LINK {cost: 1.0}]->(b),
+            (a)-[:LINK {cost: 1.0}]->(c),
+            (b)-[:LINK {cost: 1.0}]->(d),
+            (c)-[:LINK {cost: 1.0}]->(e),
+            (d)-[:LINK {cost: 1.0}]->(f),
+            (e)-[:LINK {cost: 1.0}]->(f)
+        """
+
+
+@pytest.fixture
+def db_graph(arrow_client: AuthenticatedArrowClient, query_runner: QueryRunner) -> Generator[GraphV2, None, None]:
+    with create_graph_from_db(
+        arrow_client,
+        query_runner,
+        "g",
+        graph,
+        """
+                    MATCH (source)-[r]->(target)
+                    WITH gds.graph.project.remote(source, target, {
+                        sourceNodeProperties: properties(source),
+                        targetNodeProperties: properties(target),
+                        relationshipProperties: properties(r)
+                    }) as g
+                    RETURN g
+                """,
+        undirected_relationship_types=["*"],
+    ) as g:
+        yield g
+
+
+@pytest.mark.db_integration
+def test_k_spanning_tree_write(
+    arrow_client: AuthenticatedArrowClient, query_runner: QueryRunner, db_graph: GraphV2
+) -> None:
+    k_spanning_tree_endpoints = KSpanningTreeArrowEndpoints(
+        arrow_client, write_back_client=RemoteWriteBackClient(arrow_client, query_runner)
+    )
+    result = k_spanning_tree_endpoints.write(
+        G=db_graph,
+        k=3,
+        write_property="weight",
+        source_node=0,
+        relationship_weight_property="cost",
+    )
+
+    assert result.effective_node_count == 3
+    assert result.write_millis >= 0
+    assert result.compute_millis >= 0
+    assert result.pre_processing_millis >= 0
+    assert result.post_processing_millis >= 0
diff --git a/graphdatascience/tests/integrationV2/procedure_surface/cypher/pathfinding/test_k_spanning_tree_cypher_endpoints.py b/graphdatascience/tests/integrationV2/procedure_surface/cypher/pathfinding/test_k_spanning_tree_cypher_endpoints.py
@@ -0,0 +1,71 @@
+from typing import Generator
+
+import pytest
+
+from graphdatascience import QueryRunner
+from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
+from graphdatascience.procedure_surface.cypher.pathfinding.k_spanning_tree_cypher_endpoints import (
+    KSpanningTreeCypherEndpoints,
+)
+from graphdatascience.tests.integrationV2.procedure_surface.cypher.cypher_graph_helper import create_graph
+from graphdatascience.tests.integrationV2.procedure_surface.node_lookup_helper import find_node_by_name
+
+
+@pytest.fixture
+def sample_graph(query_runner: QueryRunner) -> Generator[GraphV2, None, None]:
+    create_statement = """
+    CREATE
+    (a: Node {name: 'A'}),
+    (b: Node {name: 'B'}),
+    (c: Node {name: 'C'}),
+    (d: Node {name: 'D'}),
+    (e: Node {name: 'E'}),
+    (f: Node {name: 'F'}),
+    (a)-[:LINK {cost: 1.0}]->(b),
+    (a)-[:LINK {cost: 1.0}]->(c),
+    (b)-[:LINK {cost: 1.0}]->(d),
+    (c)-[:LINK {cost: 1.0}]->(e),
+    (d)-[:LINK {cost: 1.0}]->(f),
+    (e)-[:LINK {cost: 1.0}]->(f)
+    """
+
+    projection_query = """
+        MATCH (source)-[r]->(target)
+        WITH gds.graph.project('g', source, target, {
+            relationshipProperties: properties(r)
+        }, {undirectedRelationshipTypes: ['*']}) AS G
+        RETURN G
+    """
+
+    with create_graph(
+        query_runner,
+        "g",
+        create_statement,
+        projection_query,
+    ) as g:
+        yield g
+
+
+@pytest.fixture
+def k_spanning_tree_endpoints(query_runner: QueryRunner) -> Generator[KSpanningTreeCypherEndpoints, None, None]:
+    yield KSpanningTreeCypherEndpoints(query_runner)
+
+
+def test_k_spanning_tree_write(
+    k_spanning_tree_endpoints: KSpanningTreeCypherEndpoints, sample_graph: GraphV2, query_runner: QueryRunner
+) -> None:
+    source = find_node_by_name(query_runner, "A")
+
+    result = k_spanning_tree_endpoints.write(
+        G=sample_graph,
+        k=3,
+        write_property="weight",
+        source_node=source,
+        relationship_weight_property="cost",
+    )
+
+    assert result.effective_node_count == 3
+    assert result.write_millis >= 0
+    assert result.compute_millis >= 0
+    assert result.pre_processing_millis >= 0
+    assert result.post_processing_millis >= 0
diff --git a/graphdatascience/tests/integrationV2/procedure_surface/session/test_session_endpoint_coverage.py b/graphdatascience/tests/integrationV2/procedure_surface/session/test_session_endpoint_coverage.py