Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions src/datajoint/diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ def cascade(cls, table_expr, part_integrity="enforce"):
>>> # Inspect the cascade subgraph
>>> dj.Diagram.cascade(Session & 'subject_id=1')
"""
# Validate eagerly (mirrors Table.delete) — a typo like "casade" would
# otherwise silently degrade to enforce/ignore behavior.
if part_integrity not in ("enforce", "ignore", "cascade"):
raise ValueError(f"part_integrity must be 'enforce', 'ignore', or 'cascade', got {part_integrity!r}")
conn = table_expr.connection
conn.dependencies.load_all_downstream()
node = table_expr.full_table_name
Expand Down Expand Up @@ -698,7 +702,14 @@ def _propagate_restrictions(self, start_node, mode, part_integrity="enforce"):
# Only propagate through descendants of start_node
allowed_nodes = {start_node} | set(nx.descendants(self, start_node))
propagated_edges = set()
visited_masters = set()
# Dedup upward part→master walks by (part, master) PAIR, not by master
# alone: distinct Parts of the same master can be restricted via
# different FK paths from the seed, and each must contribute its own
# master rows. Keying by master alone under-restricted the master when
# a second Part was reached later (silent integrity violation in
# part_integrity="cascade" mode). The pair set is finite and global
# across passes, so the multi-pass loop still terminates.
visited_part_master_pairs = set()

restrictions = self._cascade_restrictions if mode == "cascade" else self._restrict_conditions

Expand All @@ -708,8 +719,8 @@ def _propagate_restrictions(self, start_node, mode, part_integrity="enforce"):
# Trigger the upward propagation explicitly for the seed. See #1429.
if part_integrity == "cascade" and mode == "cascade":
seed_master = extract_master(start_node)
if seed_master and seed_master in self.nodes() and seed_master not in visited_masters:
visited_masters.add(seed_master)
if seed_master and seed_master in self.nodes() and (start_node, seed_master) not in visited_part_master_pairs:
visited_part_master_pairs.add((start_node, seed_master))
if self._propagate_part_to_master(start_node, seed_master, mode, restrictions):
allowed_nodes.add(seed_master)
allowed_nodes.update(nx.descendants(self, seed_master))
Expand Down Expand Up @@ -777,10 +788,16 @@ def _propagate_restrictions(self, start_node, mode, part_integrity="enforce"):
# rules at each edge. Handles Part-of-Part chains and
# renamed FKs (via .proj()), unlike the prior implementation
# which assumed shared PK attribute names. See #1429.
# Deduped per (part, master) pair — every restricted Part
# of a master contributes its own master rows.
if part_integrity == "cascade" and mode == "cascade":
master_name = extract_master(target)
if master_name and master_name in self.nodes() and master_name not in visited_masters:
visited_masters.add(master_name)
if (
master_name
and master_name in self.nodes()
and (target, master_name) not in visited_part_master_pairs
):
visited_part_master_pairs.add((target, master_name))
propagated = self._propagate_part_to_master(target, master_name, mode, restrictions)
if propagated:
allowed_nodes.add(master_name)
Expand Down
7 changes: 7 additions & 0 deletions src/datajoint/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,13 @@ def drop(self, prompt: bool | None = None, part_integrity: str = "enforce"):
- ``"enforce"`` (default): Error if parts would be dropped without masters.
- ``"ignore"``: Allow dropping parts without masters.
"""
# Validate eagerly: "cascade" is delete-only (re-deriving rows makes no
# sense for drop), and a typo would otherwise silently behave as "ignore".
if part_integrity not in ("enforce", "ignore"):
raise ValueError(
f"part_integrity must be 'enforce' or 'ignore' for drop "
f"('cascade' applies to delete only), got {part_integrity!r}"
)
if self.restriction:
raise DataJointError(
"A table with an applied restriction cannot be dropped. " "Call drop() on the unrestricted Table."
Expand Down
100 changes: 100 additions & 0 deletions tests/integration/test_cascade_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,3 +479,103 @@ class PartB(dj.Part):
assert Master().fetch1("master_id") == 2
assert len(Master.PartA()) == 1
assert len(Master.PartB()) == 1


# =========================================================================
# Post-2.3 audit: with part_integrity="cascade", EVERY restricted Part of a
# master must contribute its rows to the master's restriction. The upward
# walk used to be deduplicated per master (first Part wins), silently
# under-restricting the master when a second Part of the same master was
# reached via a different FK path.
# =========================================================================


def test_cascade_two_parts_same_master_both_restrict(schema_by_backend):
"""Two Parts of one master, each referencing an external table. Deleting
the external row reaches PartA (under master 1) and PartB (under master 2);
the cascade must restrict BOTH masters, not just the first Part's."""

@schema_by_backend
class Ext(dj.Manual):
definition = """
ext_id : int32
"""

@schema_by_backend
class Master(dj.Manual):
definition = """
master_id : int32
"""

class PartA(dj.Part):
definition = """
-> master
-> Ext
"""

class PartB(dj.Part):
definition = """
-> master
-> Ext
"""

Ext.insert([(1,), (2,)])
Master.insert([(1,), (2,), (3,)])
Master.PartA.insert([(1, 1), (3, 2)]) # ext 1 under master 1; control under master 3
Master.PartB.insert([(2, 1), (3, 2)]) # ext 1 under master 2; control under master 3

counts = dj.Diagram.cascade(Ext & {"ext_id": 1}, part_integrity="cascade").counts()

# Both masters must be in the cascade — master 1 (via PartA) AND master 2
# (via PartB). The old once-per-master dedup returned 1 here.
assert counts.get(Master.full_table_name, 0) == 2, (
f"both masters must be restricted (PartA -> master 1, PartB -> master 2); "
f"got {counts.get(Master.full_table_name)}: the upward walk fired for "
f"only one Part of the master."
)

# End-to-end delete: both compositional units removed; master 3 untouched.
(Ext & {"ext_id": 1}).delete(prompt=False, part_integrity="cascade")

remaining_masters = {r["master_id"] for r in Master.to_dicts()}
assert remaining_masters == {3}, f"masters 1 and 2 must be deleted; got {remaining_masters}"
assert len(Master.PartA()) == 1 # only the control row (3, 2)
assert len(Master.PartB()) == 1
assert len(Ext & {"ext_id": 2}) == 1 # unrelated ext row intact


def test_diagram_cascade_rejects_invalid_part_integrity(schema_by_backend):
"""Diagram.cascade validates part_integrity eagerly (mirrors Table.delete)
— a typo must raise, not silently degrade to enforce/ignore behavior."""

@schema_by_backend
class Simple(dj.Manual):
definition = """
sid : int32
"""

Simple.insert([(1,)])

with pytest.raises(ValueError, match="part_integrity must be"):
dj.Diagram.cascade(Simple & {"sid": 1}, part_integrity="casade")


def test_drop_rejects_invalid_part_integrity(schema_by_backend):
"""Table.drop validates part_integrity: 'cascade' is delete-only and typos
must raise instead of silently behaving as 'ignore'. The table survives."""

@schema_by_backend
class Keeper(dj.Manual):
definition = """
kid : int32
"""

Keeper.insert([(1,)])

with pytest.raises(ValueError, match="part_integrity must be"):
Keeper.drop(part_integrity="cascade")
with pytest.raises(ValueError, match="part_integrity must be"):
Keeper.drop(part_integrity="enforcee")

# validation fired before any drop work — table intact
assert len(Keeper()) == 1
58 changes: 58 additions & 0 deletions tests/integration/test_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,61 @@ class Standalone(dj.Manual):
assert len(trace[Standalone]) == 1
counts = trace.counts()
assert counts == {Standalone.full_table_name: 1}


def test_trace_stops_at_master_no_part_down_collection(schema_by_backend):
"""Pins shipped merge-table semantics: trace walks ancestor FK edges only.
It does NOT descend from an ancestor Master into that Master's Parts — an
ancestor's Part is included only when the Part itself lies on an FK path
to the seed. In the merge shape Parent -> Master.P -> Master -> Child,
trace(Child & key) reaches Master but neither Master.P nor Parent.

This corrects the design comment on datajoint/datajoint-python discussion
1232, which described a Master->Parts down-collection that was never
implemented; the spec (provenance.md, Allowed table set) matches this
test. If down-collection is ever added deliberately, this test must be
revised alongside the spec — it exists so the semantics cannot drift
silently."""

@schema_by_backend
class Parent(dj.Manual):
definition = """
parent_id : int32
"""

@schema_by_backend
class Master(dj.Manual):
definition = """
master_id : int32
"""

class P(dj.Part):
definition = """
-> master
-> Parent
"""

@schema_by_backend
class Child(dj.Manual):
definition = """
-> Master
child_id : int32
"""

Parent.insert([(5,)])
Master.insert([(10,)])
Master.P.insert([(10, 5)])
Child.insert([(10, 1000)])

trace = dj.Diagram.trace(Child & {"master_id": 10, "child_id": 1000})

# The Master is a true ancestor — reachable and correctly restricted.
assert len(trace[Master]) == 1
assert trace[Master].fetch1("master_id") == 10

# The Master's Part and the Part's parent are NOT in the trace: the Part
# is a descendant of Master, not on an ancestor path from Child.
with pytest.raises(DataJointError, match="not in this trace"):
trace[Master.P]
with pytest.raises(DataJointError, match="not in this trace"):
trace[Parent]
Loading