Skip to content

Commit 0b06a91

Browse files
authored
Merge pull request #5272 from Jakoma02/channel-categories-bit-masks
Add bitmask fields for ChannelMetadata categories
2 parents 17d931e + 2c92e7c commit 0b06a91

File tree

10 files changed

+392
-107
lines changed

10 files changed

+392
-107
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 3.2.24 on 2025-08-08 17:32
2+
from django.db import migrations
3+
from django.db import models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("kolibri_public", "0007_new_channel_metadata"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="channelmetadata",
15+
name="categories_bitmask_0",
16+
field=models.BigIntegerField(blank=True, default=0, null=True),
17+
),
18+
]

contentcuration/kolibri_public/models.py

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from django.db import models
2-
from django.db.models import F
32
from kolibri_content import base_models
43
from kolibri_content.fields import JSONField
5-
from kolibri_public.search import bitmask_fieldnames
6-
from kolibri_public.search import metadata_bitmasks
4+
from kolibri_public.search import channelmetadata_bitmask_fieldnames
5+
from kolibri_public.search import channelmetadata_metadata_bitmasks
6+
from kolibri_public.search import contentnode_bitmask_fieldnames
7+
from kolibri_public.search import contentnode_metadata_bitmasks
8+
from kolibri_public.search import has_all_labels
79
from mptt.managers import TreeManager
810
from mptt.querysets import TreeQuerySet
911

@@ -17,31 +19,7 @@ class ContentTag(base_models.ContentTag):
1719

1820
class ContentNodeQueryset(TreeQuerySet):
1921
def has_all_labels(self, field_name, labels):
20-
bitmasks = metadata_bitmasks[field_name]
21-
bits = {}
22-
for label in labels:
23-
if label in bitmasks:
24-
bitmask_fieldname = bitmasks[label]["bitmask_field_name"]
25-
if bitmask_fieldname not in bits:
26-
bits[bitmask_fieldname] = 0
27-
bits[bitmask_fieldname] += bitmasks[label]["bits"]
28-
29-
filters = {}
30-
annotations = {}
31-
for bitmask_fieldname, bits in bits.items():
32-
annotation_fieldname = "{}_{}".format(bitmask_fieldname, "masked")
33-
# To get the correct result, i.e. an AND that all the labels are present,
34-
# we need to check that the aggregated value is euqal to the bits.
35-
# If we wanted an OR (which would check for any being present),
36-
# we would have to use GREATER THAN 0 here.
37-
filters[annotation_fieldname] = bits
38-
# This ensures that the annotated value is the result of the AND operation
39-
# so if all the values are present, the result will be the same as the bits
40-
# but if any are missing, it will not be equal to the bits, but will only be
41-
# 0 if none of the bits are present.
42-
annotations[annotation_fieldname] = F(bitmask_fieldname).bitand(bits)
43-
44-
return self.annotate(**annotations).filter(**filters)
22+
return has_all_labels(self, contentnode_metadata_bitmasks, field_name, labels)
4523

4624

4725
class ContentNodeManager(
@@ -79,7 +57,7 @@ class ContentNode(base_models.ContentNode):
7957
objects = ContentNodeManager()
8058

8159

82-
for field_name in bitmask_fieldnames:
60+
for field_name in contentnode_bitmask_fieldnames:
8361
field = models.BigIntegerField(default=0, null=True, blank=True)
8462
field.contribute_to_class(ContentNode, field_name)
8563

@@ -96,6 +74,17 @@ class AssessmentMetaData(base_models.AssessmentMetaData):
9674
pass
9775

9876

77+
class ChannelMetadataQueryset(models.QuerySet):
78+
def has_all_labels(self, field_name, labels):
79+
return has_all_labels(
80+
self, channelmetadata_metadata_bitmasks, field_name, labels
81+
)
82+
83+
84+
class ChannelMetadataManager(models.Manager.from_queryset(ChannelMetadataQueryset)):
85+
pass
86+
87+
9988
class ChannelMetadata(base_models.ChannelMetadata):
10089
# Note: The `categories` field should contain a _list_, NOT a _dict_.
10190

@@ -110,6 +99,13 @@ class ChannelMetadata(base_models.ChannelMetadata):
11099
categories = models.JSONField(null=True, blank=True)
111100
countries = models.ManyToManyField(Country, related_name="public_channels")
112101

102+
objects = ChannelMetadataManager()
103+
104+
105+
for field_name in channelmetadata_bitmask_fieldnames:
106+
field = models.BigIntegerField(default=0, null=True, blank=True)
107+
field.contribute_to_class(ChannelMetadata, field_name)
108+
113109

114110
class MPTTTreeIDManager(models.Model):
115111
"""

contentcuration/kolibri_public/search.py

Lines changed: 84 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,37 +26,54 @@
2626
from le_utils.constants.labels.subjects import SUBJECTSLIST
2727

2828

29-
metadata_lookup = {
29+
contentnode_metadata_lookup = {
3030
"learning_activities": LEARNINGACTIVITIESLIST,
3131
"categories": SUBJECTSLIST,
3232
"grade_levels": LEVELSLIST,
3333
"accessibility_labels": ACCESSIBILITYCATEGORIESLIST,
3434
"learner_needs": NEEDSLIST,
3535
}
36+
contentnode_metadata_bitmasks = {}
37+
contentnode_bitmask_fieldnames = {}
3638

37-
38-
metadata_bitmasks = {}
39-
40-
bitmask_fieldnames = {}
41-
42-
43-
for key, labels in metadata_lookup.items():
44-
bitmask_lookup = {}
45-
i = 0
46-
while labels[i : i + 64]:
47-
bitmask_field_name = "{}_bitmask_{}".format(key, i)
48-
bitmask_fieldnames[bitmask_field_name] = []
49-
for j, label in enumerate(labels):
50-
info = {
51-
"bitmask_field_name": bitmask_field_name,
52-
"field_name": key,
53-
"bits": 2 ** j,
54-
"label": label,
55-
}
56-
bitmask_lookup[label] = info
57-
bitmask_fieldnames[bitmask_field_name].append(info)
58-
i += 64
59-
metadata_bitmasks[key] = bitmask_lookup
39+
channelmetadata_metadata_lookup = {
40+
"categories": SUBJECTSLIST,
41+
}
42+
channelmetadata_metadata_bitmasks = {}
43+
channelmetadata_bitmask_fieldnames = {}
44+
45+
46+
def _populate_bitmask_data(metadata_lookup, metadata_bitmasks, bitmask_fieldnames):
47+
48+
for key, labels in metadata_lookup.items():
49+
bitmask_lookup = {}
50+
i = 0
51+
while (chunk := labels[i : i + 64]) :
52+
bitmask_field_name = "{}_bitmask_{}".format(key, i)
53+
bitmask_fieldnames[bitmask_field_name] = []
54+
for j, label in enumerate(chunk):
55+
info = {
56+
"bitmask_field_name": bitmask_field_name,
57+
"field_name": key,
58+
"bits": 2 ** j,
59+
"label": label,
60+
}
61+
bitmask_lookup[label] = info
62+
bitmask_fieldnames[bitmask_field_name].append(info)
63+
i += 64
64+
metadata_bitmasks[key] = bitmask_lookup
65+
66+
67+
_populate_bitmask_data(
68+
contentnode_metadata_lookup,
69+
contentnode_metadata_bitmasks,
70+
contentnode_bitmask_fieldnames,
71+
)
72+
_populate_bitmask_data(
73+
channelmetadata_metadata_lookup,
74+
channelmetadata_metadata_bitmasks,
75+
channelmetadata_bitmask_fieldnames,
76+
)
6077

6178

6279
def _get_available_languages(base_queryset):
@@ -87,7 +104,7 @@ def _get_available_channels(base_queryset):
87104
# Remove the SQLite Bitwise OR definition as not needed.
88105

89106

90-
def get_available_metadata_labels(base_queryset):
107+
def get_contentnode_available_metadata_labels(base_queryset):
91108
# Updated to use the kolibri_public ChannelMetadata model
92109
from kolibri_public.models import ChannelMetadata
93110

@@ -101,12 +118,12 @@ def get_available_metadata_labels(base_queryset):
101118
if cache_key not in cache:
102119
base_queryset = base_queryset.order_by()
103120
aggregates = {}
104-
for field in bitmask_fieldnames:
121+
for field in contentnode_bitmask_fieldnames:
105122
field_agg = field + "_agg"
106123
aggregates[field_agg] = BitOr(field)
107124
output = {}
108125
agg = base_queryset.aggregate(**aggregates)
109-
for field, values in bitmask_fieldnames.items():
126+
for field, values in contentnode_bitmask_fieldnames.items():
110127
bit_value = agg[field + "_agg"]
111128
for value in values:
112129
if value["field_name"] not in output:
@@ -123,10 +140,12 @@ def get_all_contentnode_label_metadata():
123140
# Updated to use the kolibri_public ContentNode model
124141
from kolibri_public.models import ContentNode
125142

126-
return get_available_metadata_labels(ContentNode.objects.filter(available=True))
143+
return get_contentnode_available_metadata_labels(
144+
ContentNode.objects.filter(available=True)
145+
)
127146

128147

129-
def annotate_label_bitmasks(queryset):
148+
def annotate_label_bitmasks(queryset, bitmask_fieldnames):
130149
update_statements = {}
131150
for bitmask_fieldname, label_info in bitmask_fieldnames.items():
132151
update_statements[bitmask_fieldname] = sum(
@@ -142,3 +161,39 @@ def annotate_label_bitmasks(queryset):
142161
for info in label_info
143162
)
144163
queryset.update(**update_statements)
164+
165+
166+
def annotate_contentnode_label_bitmasks(queryset):
167+
return annotate_label_bitmasks(queryset, contentnode_bitmask_fieldnames)
168+
169+
170+
def annotate_channelmetadata_label_bitmasks(queryset):
171+
return annotate_label_bitmasks(queryset, channelmetadata_bitmask_fieldnames)
172+
173+
174+
def has_all_labels(queryset, metadata_bitmasks, field_name, labels):
175+
bitmasks = metadata_bitmasks[field_name]
176+
bits = {}
177+
for label in labels:
178+
if label in bitmasks:
179+
bitmask_fieldname = bitmasks[label]["bitmask_field_name"]
180+
if bitmask_fieldname not in bits:
181+
bits[bitmask_fieldname] = 0
182+
bits[bitmask_fieldname] += bitmasks[label]["bits"]
183+
184+
filters = {}
185+
annotations = {}
186+
for bitmask_fieldname, bits in bits.items():
187+
annotation_fieldname = "{}_{}".format(bitmask_fieldname, "masked")
188+
# To get the correct result, i.e. an AND that all the labels are present,
189+
# we need to check that the aggregated value is euqal to the bits.
190+
# If we wanted an OR (which would check for any being present),
191+
# we would have to use GREATER THAN 0 here.
192+
filters[annotation_fieldname] = bits
193+
# This ensures that the annotated value is the result of the AND operation
194+
# so if all the values are present, the result will be the same as the bits
195+
# but if any are missing, it will not be equal to the bits, but will only be
196+
# 0 if none of the bits are present.
197+
annotations[annotation_fieldname] = F(bitmask_fieldname).bitand(bits)
198+
199+
return queryset.annotate(**annotations).filter(**filters)

0 commit comments

Comments
 (0)