Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions api/analyzers/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,32 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_

pass

@abstractmethod
def add_file_imports(self, file: File) -> None:
"""
Add import statements to the file.

Args:
file (File): The file to add imports to.
"""

pass

@abstractmethod
def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]:
"""
Resolve an import statement to entities.

Args:
files (dict[Path, File]): All files in the project.
lsp (SyncLanguageServer): The language server.
file_path (Path): The path to the file containing the import.
path (Path): The path to the project root.
import_node (Node): The import statement node.

Returns:
list[Entity]: List of resolved entities.
"""

pass

16 changes: 16 additions & 0 deletions api/analyzers/java/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,19 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")

def add_file_imports(self, file: File) -> None:
"""
Extract and add import statements from the file.
Java imports are not yet implemented.
"""
# TODO: Implement Java import tracking
pass

def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]:
"""
Resolve an import statement to the entities it imports.
Java imports are not yet implemented.
"""
# TODO: Implement Java import resolution
return []
92 changes: 92 additions & 0 deletions api/analyzers/python/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,95 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")

def add_file_imports(self, file: File) -> None:
"""
Extract and add import statements from the file.

Supports:
- import module
- import module as alias
- from module import name
- from module import name1, name2
- from module import name as alias
"""
try:
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Query for both import types
import_query = self.language.query("""
(import_statement) @import
(import_from_statement) @import_from
""")

captures = import_query.captures(file.tree.root_node)

# Add all import statement nodes to the file
if 'import' in captures:
for import_node in captures['import']:
file.add_import(import_node)

if 'import_from' in captures:
for import_node in captures['import_from']:
file.add_import(import_node)
except Exception as e:
logger.debug(f"Failed to extract imports from {file.path}: {e}")

def resolve_import(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, import_node: Node) -> list[Entity]:
"""
Resolve an import statement to the entities it imports.
"""
res = []

try:
if import_node.type == 'import_statement':
# Handle "import module" or "import module as alias"
# Find all dotted_name and aliased_import nodes
for child in import_node.children:
if child.type == 'dotted_name':
# Try to resolve the module/name
identifier = child.children[0] if child.child_count > 0 else child
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)
elif child.type == 'aliased_import':
# Get the actual name from aliased import (before 'as')
if child.child_count > 0:
actual_name = child.children[0]
if actual_name.type == 'dotted_name' and actual_name.child_count > 0:
identifier = actual_name.children[0]
else:
identifier = actual_name
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)
Comment on lines +166 to +184
Copy link
Contributor

@coderabbitai coderabbitai bot Nov 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify handling of dotted module imports and bare import statements.

The implementation may not correctly handle certain import patterns:

  1. Dotted imports: For import a.b.c, line 172 extracts child.children[0] which only resolves a, not the full module path a.b.c.

  2. Bare module imports: For import module (not from module import ...), the code attempts to resolve the module itself as a type/entity. This differs semantically from from module import entity, where a specific entity is imported. Consider whether bare module imports should create IMPORTS edges or be handled differently.


🏁 Script executed:

#!/bin/bash
# Check for test cases or usage of dotted imports and bare import statements
rg -n "^import [a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z0-9_.]+" tests/ --type py -C 2
rg -n "^import [a-zA-Z_][a-zA-Z0-9_]*$" tests/ --type py -C 2

Length of output: 2908


I need to examine the actual implementation to verify whether the concerns about dotted imports and bare imports are valid.


🏁 Script executed:

#!/bin/bash
# Examine the resolve_type method and surrounding context
rg -n "def resolve_type" api/analyzers/python/analyzer.py -A 20

Length of output: 1221


🏁 Script executed:

#!/bin/bash
# Check how dotted_name is handled elsewhere in the file
rg -n "dotted_name" api/analyzers/python/analyzer.py -B 2 -A 2

Length of output: 1529


🏁 Script executed:

#!/bin/bash
# Examine the full import handling section
cat api/analyzers/python/analyzer.py | sed -n '160,200p'

Length of output: 2271


🏁 Script executed:

#!/bin/bash
# Check if there's any handling of multi-part module names (a.b.c pattern)
rg -n "dotted" api/analyzers/python/analyzer.py

Length of output: 499


🏁 Script executed:

#!/bin/bash
# Look for test cases that might exercise dotted imports or the resolve logic
rg -n "resolve_type\|dotted_name\|aliased_import" tests/test_py_analyzer.py -B 2 -A 2

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check structure of tree-sitter Python AST for dotted names
python3 << 'EOF'
try:
    from tree_sitter import Language, Parser
    import tree_sitter_python as tspython
    
    # Create parser
    PY_LANGUAGE = Language(tspython.language(), "python")
    parser = Parser()
    parser.set_language(PY_LANGUAGE)
    
    # Parse sample dotted import
    code = b"import a.b.c"
    tree = parser.parse(code)
    
    def print_tree(node, indent=0):
        print("  " * indent + f"{node.type}: {repr(node.text)}")
        for child in node.children:
            print_tree(child, indent + 1)
    
    print("=== import a.b.c ===")
    print_tree(tree.root_node)
    
    # Parse bare import
    code2 = b"import os"
    tree2 = parser.parse(code2)
    print("\n=== import os ===")
    print_tree(tree2.root_node)
    
except Exception as e:
    print(f"Error: {e}")
    import traceback
    traceback.print_exc()
EOF

Length of output: 160


🏁 Script executed:

#!/bin/bash
# Check if resolve_type is called elsewhere and how results are used
rg -n "resolve_type\|\.extend\(resolved\)" api/analyzers/python/analyzer.py -B 1 -A 1

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check how the results (res) are used after being extended
rg -n "def get_imports\|return res" api/analyzers/python/analyzer.py -A 3 -B 3

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for actual usage of import edges or IMPORTS in the codebase
rg -n "IMPORTS\|ImportEdge" api/analyzers/ --type py | head -20

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check what resolve_type returns
rg -n "def resolve_type" api/analyzers/ --type py -A 30 | grep -A 30 "analyzer.py"

Length of output: 8301


🏁 Script executed:

#!/bin/bash
# Check if there are actual dotted imports in the codebase (not just tests)
find api -name "*.py" -type f -exec grep -l "^import [a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z0-9_]" {} \;

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at what specific nodes we're dealing with
rg -n "import_statement\|aliased_import" api/analyzers/python/analyzer.py -B 1 -A 1

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check if child_count and children indexing are correct
cat api/analyzers/python/analyzer.py | sed -n '160,195p'

Length of output: 1905


🏁 Script executed:

#!/bin/bash
# Understand the import handling more thoroughly - get larger context
cat api/analyzers/python/analyzer.py | sed -n '150,210p'

Length of output: 3457


🏁 Script executed:

#!/bin/bash
# Check for any node processing utilities that handle dotted names
rg -n "def.*dotted\|def.*name\|children\[0\]" api/analyzers/python/analyzer.py | head -15

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Find and examine how nodes are extracted for identifiers
ast-grep --pattern 'identifier = $_ if $_ else $_'

Length of output: 255


🏁 Script executed:

#!/bin/bash
# Look at the exact implementation more carefully
sed -n '166,184p' api/analyzers/python/analyzer.py

Length of output: 1229


🏁 Script executed:

#!/bin/bash
# Check how the full dotted path should be reconstructed
rg -n "join\|\.split\|text\|string" api/analyzers/python/analyzer.py | grep -i dot | head -10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Get node.text or equivalent for dotted_name nodes
python3 << 'EOF'
# Check if we can get full text from tree-sitter node
import inspect
try:
    from tree_sitter import Node
    if hasattr(Node, 'text'):
        print("Node has 'text' attribute")
    print("Node methods/attributes:", [m for m in dir(Node) if not m.startswith('_')][:20])
except Exception as e:
    print(f"Error: {e}")
EOF

Length of output: 36


🏁 Script executed:

#!/bin/bash
# Let's check the actual file structure and logic once more
cat api/analyzers/python/analyzer.py | sed -n '155,200p'

Length of output: 2573


🏁 Script executed:

#!/bin/bash
# Check if there's a method to get the full text from a dotted_name node
rg -n "\.text\|get_text" api/analyzers/python/analyzer.py | head -10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look at the identifier resolution and what resolve_type expects
rg -n "def resolve_type\|identifier" api/analyzers/python/analyzer.py | head -20

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search entire codebase for resolve_type implementation
rg -n "def resolve_type" api/ --type py -A 25

Length of output: 7132


🏁 Script executed:

#!/bin/bash
# Check what happens with bare imports - understand the semantic concern
rg -n "import_statement\|from.*import" tests/test_py_analyzer.py -B 1 -A 3 | head -30

Length of output: 0


Need to check where resolve_import is called and whether this code is actually used:Now let me search for where resolve_import is actually being called:


🏁 Script executed:

#!/bin/bash
# Find all calls to resolve_import
rg -n "resolve_import" api/ --type py

Length of output: 719


🏁 Script executed:

#!/bin/bash
# Check the add_imports method to see if resolve_import is called
rg -n "def add_imports\|resolve_import" api/analyzers/python/analyzer.py -B 2 -A 10

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check imports section context more carefully - find what calls resolve
rg -n "def.*import\|resolve" api/analyzers/python/analyzer.py | head -25

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Find where resolve_import method is referenced/called
rg -n "\.resolve_import" api/ --type py

Length of output: 185


🏁 Script executed:

#!/bin/bash
# Check context around the resolve_import call
sed -n '160,180p' api/analyzers/source_analyzer.py

Length of output: 1270


🏁 Script executed:

#!/bin/bash
# See broader context of how imports are resolved
sed -n '150,190p' api/analyzers/source_analyzer.py

Length of output: 2358


🏁 Script executed:

#!/bin/bash
# Check what resolved_entities is used for
rg -n "resolved_entities" api/analyzers/source_analyzer.py -B 2 -A 5

Length of output: 618


🏁 Script executed:

#!/bin/bash
# Understand the import node structure - what does add_import do with import nodes
rg -n "def add_import\|import_node" api/models/file.py -B 2 -A 5

Length of output: 58


🏁 Script executed:

#!/bin/bash
# See how imports are stored/used in File class
rg -n "class File\|add_import\|imports" api/models/file.py | head -20

Length of output: 58


🏁 Script executed:

#!/bin/bash
# Look at tree-sitter Python grammar to understand dotted_name structure
cat api/analyzers/python/analyzer.py | sed -n '1,50p'

Length of output: 2406


🏁 Script executed:

#!/bin/bash
# Check if there's any reconstruction of full dotted paths
rg -n "get_text\|\.text\|join" api/analyzers/python/analyzer.py | head -15

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for how other analyzers handle dotted imports
sed -n '144,200p' api/analyzers/java/analyzer.py

Length of output: 344


🏁 Script executed:

#!/bin/bash
# Check the base analyzer to see if there's a pattern for handling this
sed -n '158,220p' api/analyzers/analyzer.py

Length of output: 628


Fix dotted import resolution to capture full module paths, not just first segment.

The code at line 172 (and 180, 198, 206) uses child.children[0] to extract identifiers from dotted_name nodes. This only captures the first segment. For import a.b.c, this extracts only a instead of the full path a.b.c.

# Current behavior extracts only first child:
identifier = child.children[0] if child.child_count > 0 else child

# Should use node.text to capture full dotted path:
identifier = child  # use the entire dotted_name node, or reconstruct from text

Bare imports may not resolve as intended. Since resolve_type searches for a class_definition parent, bare module imports (e.g., import os) will likely return empty results. Verify whether this is intentional (only track entity imports, not module imports) or if module-level resolution needs different handling.

Add test coverage for dotted and bare import patterns to prevent regression, since no test cases currently exercise resolve_import logic.

🤖 Prompt for AI Agents
In api/analyzers/python/analyzer.py around lines 166 to 184, change the
identifier extraction for dotted_name nodes so you pass the full dotted path to
resolve_type instead of only the first segment: use the entire dotted_name node
(or reconstruct the full path from its children text segments joined by '.') for
both plain dotted_name and aliased_import branches before calling resolve_type;
ensure resolve_type accepts/handles a dotted_name node or a reconstructed
string, add handling/clarity for bare module imports if you intend to resolve
modules (or leave behavior documented if only entity imports are desired), and
add unit tests covering multi-segment imports like "import a.b.c" and simple
imports like "import os" to prevent regressions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot FYI

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!


elif import_node.type == 'import_from_statement':
# Handle "from module import name1, name2"
# Find the 'import' keyword to know where imported names start
import_keyword_found = False
for child in import_node.children:
if child.type == 'import':
import_keyword_found = True
continue

# After 'import' keyword, dotted_name nodes are the imported names
if import_keyword_found and child.type == 'dotted_name':
# Try to resolve the imported name
identifier = child.children[0] if child.child_count > 0 else child
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)
elif import_keyword_found and child.type == 'aliased_import':
# Handle "from module import name as alias"
if child.child_count > 0:
actual_name = child.children[0]
if actual_name.type == 'dotted_name' and actual_name.child_count > 0:
identifier = actual_name.children[0]
else:
identifier = actual_name
resolved = self.resolve_type(files, lsp, file_path, path, identifier)
res.extend(resolved)

except Exception as e:
logger.debug(f"Failed to resolve import: {e}")

return res
13 changes: 13 additions & 0 deletions api/analyzers/source_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def first_pass(self, path: Path, files: list[Path], ignore: list[str], graph: Gr
# Walk thought the AST
graph.add_file(file)
self.create_hierarchy(file, analyzer, graph)

# Extract import statements
if not analyzer.is_dependency(str(file_path)):
analyzer.add_file_imports(file)

def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
"""
Expand Down Expand Up @@ -141,6 +145,8 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
for i, file_path in enumerate(files):
file = self.files[file_path]
logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}')

# Resolve entity symbols
for _, entity in file.entities.items():
entity.resolved_symbol(lambda key, symbol: analyzers[file_path.suffix].resolve_symbol(self.files, lsps[file_path.suffix], file_path, path, key, symbol))
for key, symbols in entity.resolved_symbols.items():
Expand All @@ -157,6 +163,13 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
graph.connect_entities("RETURNS", entity.id, symbol.id)
elif key == "parameters":
graph.connect_entities("PARAMETERS", entity.id, symbol.id)

# Resolve file imports
for import_node in file.imports:
resolved_entities = analyzers[file_path.suffix].resolve_import(self.files, lsps[file_path.suffix], file_path, path, import_node)
for resolved_entity in resolved_entities:
file.add_resolved_import(resolved_entity)
graph.connect_entities("IMPORTS", file.id, resolved_entity.id)

def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None:
self.first_pass(path, files, [], graph)
Expand Down
20 changes: 20 additions & 0 deletions api/entities/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,30 @@ def __init__(self, path: Path, tree: Tree) -> None:
self.path = path
self.tree = tree
self.entities: dict[Node, Entity] = {}
self.imports: list[Node] = []
self.resolved_imports: set[Entity] = set()

def add_entity(self, entity: Entity):
entity.parent = self
self.entities[entity.node] = entity

def add_import(self, import_node: Node):
"""
Add an import statement node to track.

Args:
import_node (Node): The import statement node.
"""
self.imports.append(import_node)

def add_resolved_import(self, resolved_entity: Entity):
"""
Add a resolved import entity.

Args:
resolved_entity (Entity): The resolved entity that is imported.
"""
self.resolved_imports.add(resolved_entity)

def __str__(self) -> str:
return f"path: {self.path}"
Expand Down
12 changes: 12 additions & 0 deletions tests/source_files/py_imports/module_a.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Module A with a class definition."""

class ClassA:
"""A simple class in module A."""

def method_a(self):
"""A method in ClassA."""
return "Method A"

def function_a():
"""A function in module A."""
return "Function A"
11 changes: 11 additions & 0 deletions tests/source_files/py_imports/module_b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Module B that imports from module A."""

from module_a import ClassA, function_a

class ClassB(ClassA):
"""A class that extends ClassA."""

def method_b(self):
"""A method in ClassB."""
result = function_a()
return f"Method B: {result}"
67 changes: 67 additions & 0 deletions tests/test_py_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import unittest
from pathlib import Path

from api import SourceAnalyzer, File, Graph


class Test_PY_Imports(unittest.TestCase):
def test_import_tracking(self):
"""Test that Python imports are tracked correctly."""
# Get test file path
current_dir = os.path.dirname(os.path.abspath(__file__))
test_path = os.path.join(current_dir, 'source_files', 'py_imports')

# Create graph and analyze
g = Graph("py_imports_test")
analyzer = SourceAnalyzer()

try:
analyzer.analyze_local_folder(test_path, g)

# Verify files were created
module_a = g.get_file('', 'module_a.py', '.py')
self.assertIsNotNone(module_a, "module_a.py should be in the graph")

module_b = g.get_file('', 'module_b.py', '.py')
self.assertIsNotNone(module_b, "module_b.py should be in the graph")

# Verify classes were created
class_a = g.get_class_by_name('ClassA')
self.assertIsNotNone(class_a, "ClassA should be in the graph")

class_b = g.get_class_by_name('ClassB')
self.assertIsNotNone(class_b, "ClassB should be in the graph")

# Verify function was created
func_a = g.get_function_by_name('function_a')
self.assertIsNotNone(func_a, "function_a should be in the graph")

# Test: module_b should have IMPORTS relationship to ClassA
# Query to check if module_b imports ClassA
query = """
MATCH (f:File {name: 'module_b.py'})-[:IMPORTS]->(c:Class {name: 'ClassA'})
RETURN c
"""
result = g._query(query, {})
self.assertGreater(len(result.result_set), 0,
"module_b.py should import ClassA")

# Test: module_b should have IMPORTS relationship to function_a
query = """
MATCH (f:File {name: 'module_b.py'})-[:IMPORTS]->(fn:Function {name: 'function_a'})
RETURN fn
"""
result = g._query(query, {})
self.assertGreater(len(result.result_set), 0,
"module_b.py should import function_a")

print("✓ Import tracking test passed")

finally:
# Cleanup: delete the test graph
g.delete()


if __name__ == '__main__':
unittest.main()