Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/openedx_tagging/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class TagData(TypedDict):
value: str
external_id: str | None
child_count: int
descendant_count: int
depth: int
parent_value: str | None
# Note: usage_count may or may not be present, depending on the request.
Expand Down
46 changes: 9 additions & 37 deletions src/openedx_tagging/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def descendant_count(self) -> int:
).count()
return 0

def save(self, *args, **kwargs):
def save(self, *args, **kwargs) -> None:
"""
Compute and persist depth and lineage before saving, then cascade any changes to descendants.
"""
Expand All @@ -187,8 +187,8 @@ def save(self, *args, **kwargs):
self.lineage = self.value + "\t"
else:
if Tag.parent.is_cached(self): # pylint: disable=no-member
parent_depth = self.parent.depth
parent_lineage = self.parent.lineage
parent_depth = self.parent.depth # type: ignore[union-attr]
parent_lineage = self.parent.lineage # type: ignore[union-attr]
else:
parent_vals = Tag.objects.values("depth", "lineage").get(pk=self.parent_id)
parent_depth = parent_vals["depth"]
Expand Down Expand Up @@ -216,7 +216,9 @@ def save(self, *args, **kwargs):
}
if depth_delta != 0:
update_kwargs["depth"] = F("depth") + depth_delta
self.taxonomy.tag_set.filter(lineage__startswith=old_values["lineage"]).exclude(pk=self.pk).update(
self.taxonomy.tag_set.filter( # type: ignore[union-attr]
lineage__startswith=old_values["lineage"]
).exclude(pk=self.pk).update(
**update_kwargs
)

Expand Down Expand Up @@ -492,7 +494,6 @@ def _get_filtered_tags_free_text(
qs = qs.annotate(
depth=Value(0),
child_count=Value(0),
descendant_count=Value(0),
external_id=Value(None, output_field=models.CharField()),
parent_value=Value(None, output_field=models.CharField()),
_id=Value(None, output_field=models.CharField()),
Expand Down Expand Up @@ -524,19 +525,11 @@ def _get_filtered_tags_one_level(
qs = self.tag_set.filter(parent=None)
qs = qs.annotate(parent_value=Value(None, output_field=models.CharField()))
qs = qs.annotate(child_count=models.Count("children", distinct=True)) # type: ignore[no-redef]
# Count all descendants at any depth using depth + lineage prefix.
# depth__gt correctly excludes self; lineage prefix matches all descendants.
descendants_sq = (
self.tag_set.filter(depth__gt=models.OuterRef("depth"), lineage__startswith=models.OuterRef("lineage"))
.order_by()
.annotate(count=models.Func(F("id"), function="Count"))
)
qs = qs.annotate(descendant_count=models.Subquery(descendants_sq.values("count"))) # type: ignore[no-redef]
# Filter by search term:
if search_term:
qs = qs.filter(value__icontains=search_term)
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values("value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id")
qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id")
qs = qs.order_by("value")
if include_counts:
# We need to include the count of how many times this tag is used to tag objects.
Expand All @@ -562,11 +555,6 @@ def _get_filtered_tags_deep(
Implementation of get_filtered_tags() for closed taxonomies, where
we're including tags from multiple levels of the hierarchy.
"""
# Note: we ignore a lot of "no-redef" warnings here because we use annotations to pre-load fields like
# `child_count`, and `descendant_count` for all tags in a single query rather than computing them later for each
# Tag, with additional queries. Also, we are converting the result to a values query (that returns a dict), not
# returning actual Tag objects at the end, but mypy doesn't know that.

if parent_tag_value:
# Get a subtree below this tag:
main_parent_tag = self.tag_for_value(parent_tag_value)
Expand Down Expand Up @@ -611,34 +599,18 @@ def _get_filtered_tags_deep(
# frontend.

# Count the direct children, and annotate the result on each row as "child_count".
# The query below produces the same results as:
# qs = initial_qs.annotate(child_count=models.Count("children"))
# However, this correlated subquery avoids a JOIN + GROUP BY, and is far more efficient in practice.
# This also lets us use the same code path whether there's a search_term or not.
child_count_sq = (
initial_qs.filter(parent_id=models.OuterRef("pk"))
.order_by()
.annotate(count=models.Func(F("id"), function="Count"))
)
# Count all descendants at any depth using the lineage prefix trick.
descendants_sq = (
initial_qs.filter(
depth__gt=models.OuterRef("depth"),
lineage__startswith=models.OuterRef("lineage"),
)
.order_by()
.annotate(count=models.Func(F("id"), function="Count"))
)
qs = initial_qs.annotate( # type: ignore[no-redef]
child_count=models.Subquery(child_count_sq.values("count")),
descendant_count=models.Subquery(descendants_sq.values("count")),
)
qs = initial_qs.annotate(child_count=models.Subquery(child_count_sq.values("count"))) # type: ignore[no-redef]

# Add the parent value
qs = qs.annotate(parent_value=F("parent__value"))
qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID
qs = qs.values( # type: ignore[assignment]
"value", "child_count", "descendant_count", "depth", "parent_value", "external_id", "_id"
"value", "child_count", "depth", "parent_value", "external_id", "_id"
)
# lineage is a case-insensitive column storing "Root\tParent\t...\tThisValue\t", so
# ordering by it gives the tree sort order that we want.
Expand Down
1 change: 0 additions & 1 deletion src/openedx_tagging/rest_api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ class TagDataSerializer(UserPermissionsSerializerMixin, serializers.Serializer):
value = serializers.CharField()
external_id = serializers.CharField(allow_null=True)
child_count = serializers.IntegerField()
descendant_count = serializers.IntegerField()
depth = serializers.IntegerField()
parent_value = serializers.CharField(allow_null=True)
usage_count = serializers.IntegerField(required=False)
Expand Down
8 changes: 4 additions & 4 deletions tests/openedx_tagging/import_export/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,25 +53,25 @@ def test_import_template(self, template_file, parser_format):
'Electronic instruments (ELECTRIC) (None) (children: 2)',
' Synthesizer (SYNTH) (Electronic instruments) (children: 0)',
' Theramin (THERAMIN) (Electronic instruments) (children: 0)',
'Percussion instruments (PERCUSS) (None) (children: 3 + 6)',
'Percussion instruments (PERCUSS) (None) (children: 3)',
' Chordophone (CHORD) (Percussion instruments) (children: 1)',
' Piano (PIANO) (Chordophone) (children: 0)',
' Idiophone (BELLS) (Percussion instruments) (children: 2)',
' Celesta (CELESTA) (Idiophone) (children: 0)',
' Hi-hat (HI-HAT) (Idiophone) (children: 0)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2 + 1)',
' Membranophone (DRUMS) (Percussion instruments) (children: 2)',
' Cajón (CAJÓN) (Membranophone) (children: 1)',
' Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0)',
' Tabla (TABLA) (Membranophone) (children: 0)',
'String instruments (STRINGS) (None) (children: 2 + 5)',
'String instruments (STRINGS) (None) (children: 2)',
' Bowed strings (BOW) (String instruments) (children: 2)',
' Cello (CELLO) (Bowed strings) (children: 0)',
' Violin (VIOLIN) (Bowed strings) (children: 0)',
' Plucked strings (PLUCK) (String instruments) (children: 3)',
' Banjo (BANJO) (Plucked strings) (children: 0)',
' Harp (HARP) (Plucked strings) (children: 0)',
' Mandolin (MANDOLIN) (Plucked strings) (children: 0)',
'Wind instruments (WINDS) (None) (children: 2 + 5)',
'Wind instruments (WINDS) (None) (children: 2)',
' Brass (BRASS) (Wind instruments) (children: 2)',
' Trumpet (TRUMPET) (Brass) (children: 0)',
' Tuba (TUBA) (Brass) (children: 0)',
Expand Down
22 changes: 11 additions & 11 deletions tests/openedx_tagging/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def test_get_tags(self) -> None:
"Bacteria (children: 2)",
" Archaebacteria (children: 0)",
" Eubacteria (children: 0)",
"Eukaryota (children: 5 + 8)",
" Animalia (children: 7 + 1)",
"Eukaryota (children: 5)",
" Animalia (children: 7)",
" Arthropoda (children: 0)",
" Chordata (children: 1)",
" Mammalia (children: 0)",
Expand Down Expand Up @@ -175,7 +175,7 @@ def test_get_root_tags(self):
assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [
'Archaea (children: 3)',
'Bacteria (children: 2)',
'Eukaryota (children: 5 + 8)',
'Eukaryota (children: 5)',
]

@override_settings(LANGUAGES=test_languages)
Expand Down Expand Up @@ -764,7 +764,7 @@ def get_object_tags():
" Proteoarchaeota (used: 0, children: 0)",
"Bacteria (used: 0, children: 1)", # does not contain "ar" but a child does
" Archaebacteria (used: 1, children: 0)",
"Eukaryota (used: 0, children: 1 + 2)",
"Eukaryota (used: 0, children: 1)",
" Animalia (used: 1, children: 2)", # does not contain "ar" but a child does
" Arthropoda (used: 1, children: 0)",
" Cnidaria (used: 0, children: 0)",
Expand All @@ -786,8 +786,8 @@ def get_object_tags():
"Bacteria (used: 0, children: 2)",
" Archaebacteria (used: 1, children: 0)",
" Eubacteria (used: 0, children: 0)",
"Eukaryota (used: 0, children: 4 + 8)",
" Animalia (used: 1, children: 7 + 1)",
"Eukaryota (used: 0, children: 4)",
" Animalia (used: 1, children: 7)",
" Arthropoda (used: 1, children: 0)",
" Chordata (used: 0, children: 1)",
" Mammalia (used: 0, children: 0)",
Expand Down Expand Up @@ -1107,10 +1107,10 @@ def test_deep_taxonomy(self) -> None:
assert pretty_format_tags(
tagging_api.get_tags(taxonomy)
) == [
"Root - depth 0 (None) (children: 1 + 4)",
" Child - depth 1 (Root - depth 0) (children: 1 + 3)",
" Grandchild - depth 2 (Child - depth 1) (children: 1 + 2)",
" Great-Grandchild - depth 3 (Grandchild - depth 2) (children: 1 + 1)",
"Root - depth 0 (None) (children: 1)",
" Child - depth 1 (Root - depth 0) (children: 1)",
" Grandchild - depth 2 (Child - depth 1) (children: 1)",
" Great-Grandchild - depth 3 (Grandchild - depth 2) (children: 1)",
" Great-Great-Grandchild - depth 4 (Great-Grandchild - depth 3) (children: 1)",
" Great-Great-Great-Grandchild - depth 5 (Great-Great-Grandchild - depth 4) (children: 0)",
]
Expand All @@ -1128,7 +1128,7 @@ def test_deep_taxonomy(self) -> None:
# Or even load a subtree:
deep_result = taxonomy.get_filtered_tags(depth=None, parent_tag_value=tag_2.value)
assert pretty_format_tags(deep_result, parent=False) == [
' Great-Grandchild - depth 3 (children: 1 + 1)',
' Great-Grandchild - depth 3 (children: 1)',
' Great-Great-Grandchild - depth 4 (children: 1)',
' Great-Great-Great-Grandchild - depth 5 (children: 0)',
]
Expand Down
Loading