Skip to content

Commit 6e762f5

Browse files
authored
Merge pull request #4786 from KshitijThareja/issue_4416
Add image resizing functionality for perseus file exports
2 parents d916071 + 822b78e commit 6e762f5

File tree

1 file changed

+82
-19
lines changed

1 file changed

+82
-19
lines changed

contentcuration/contentcuration/utils/publish.py

Lines changed: 82 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import itertools
23
import json
34
import logging as logmodule
@@ -9,6 +10,7 @@
910
import uuid
1011
import zipfile
1112
from copy import deepcopy
13+
from io import BytesIO
1214
from itertools import chain
1315

1416
from django.conf import settings
@@ -39,6 +41,7 @@
3941
from le_utils.constants import file_formats
4042
from le_utils.constants import format_presets
4143
from le_utils.constants import roles
44+
from PIL import Image
4245
from search.models import ChannelFullTextSearch
4346
from search.models import ContentNodeFullTextSearch
4447
from search.utils import get_fts_annotated_channel_qs
@@ -488,10 +491,11 @@ def create_perseus_exercise(ccnode, kolibrinode, exercise_data, user_id=None):
488491
logging.debug("Creating Perseus Exercise for Node {}".format(ccnode.title))
489492
filename = "{0}.{ext}".format(ccnode.title, ext=file_formats.PERSEUS)
490493
temppath = None
494+
resized_images_map = {}
491495
try:
492496
with tempfile.NamedTemporaryFile(suffix="zip", delete=False) as tempf:
493497
temppath = tempf.name
494-
create_perseus_zip(ccnode, exercise_data, tempf)
498+
create_perseus_zip(ccnode, exercise_data, tempf, resized_images_map)
495499
file_size = tempf.tell()
496500
tempf.flush()
497501

@@ -568,7 +572,7 @@ def process_assessment_metadata(ccnode, kolibrinode):
568572
return exercise_data
569573

570574

571-
def create_perseus_zip(ccnode, exercise_data, write_to_path):
575+
def create_perseus_zip(ccnode, exercise_data, write_to_path, resized_images_map):
572576
with zipfile.ZipFile(write_to_path, "w") as zf:
573577
try:
574578
exercise_context = {
@@ -597,7 +601,7 @@ def create_perseus_zip(ccnode, exercise_data, write_to_path):
597601
content = content.split(exercises.GRAPHIE_DELIMITER.encode('ascii'))
598602
write_to_zipfile(svg_name, content[0], zf)
599603
write_to_zipfile(json_name, content[1], zf)
600-
write_assessment_item(question, zf, channel_id)
604+
write_assessment_item(question, zf, channel_id, resized_images_map)
601605
except Exception as e:
602606
logging.error("Error while publishing channel `{}`: {}".format(channel_id, str(e)))
603607
logging.error(traceback.format_exc())
@@ -622,7 +626,7 @@ def write_to_zipfile(filename, content, zf):
622626
zf.writestr(info, content)
623627

624628

625-
def write_assessment_item(assessment_item, zf, channel_id): # noqa C901
629+
def write_assessment_item(assessment_item, zf, channel_id, resized_images_map): # noqa C901
626630
if assessment_item.type == exercises.MULTIPLE_SELECTION:
627631
template = 'perseus/multiple_selection.json'
628632
elif assessment_item.type == exercises.SINGLE_SELECTION or assessment_item.type == 'true_false':
@@ -635,7 +639,7 @@ def write_assessment_item(assessment_item, zf, channel_id): # noqa C901
635639
raise TypeError("Unrecognized question type on item {}".format(assessment_item.assessment_id))
636640

637641
question = process_formulas(assessment_item.question)
638-
question, question_images = process_image_strings(question, zf, channel_id)
642+
question, question_images = process_image_strings(question, zf, channel_id, resized_images_map)
639643

640644
answer_data = json.loads(assessment_item.answers)
641645
for answer in answer_data:
@@ -645,14 +649,14 @@ def write_assessment_item(assessment_item, zf, channel_id): # noqa C901
645649
answer['answer'] = answer['answer'].replace(exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR)
646650
answer['answer'] = process_formulas(answer['answer'])
647651
# In case perseus doesn't support =wxh syntax, use below code
648-
answer['answer'], answer_images = process_image_strings(answer['answer'], zf, channel_id)
652+
answer['answer'], answer_images = process_image_strings(answer['answer'], zf, channel_id, resized_images_map)
649653
answer.update({'images': answer_images})
650654

651655
answer_data = [a for a in answer_data if a['answer'] or a['answer'] == 0] # Filter out empty answers, but not 0
652656
hint_data = json.loads(assessment_item.hints)
653657
for hint in hint_data:
654658
hint['hint'] = process_formulas(hint['hint'])
655-
hint['hint'], hint_images = process_image_strings(hint['hint'], zf, channel_id)
659+
hint['hint'], hint_images = process_image_strings(hint['hint'], zf, channel_id, resized_images_map)
656660
hint.update({'images': hint_images})
657661

658662
answers_sorted = answer_data
@@ -687,7 +691,24 @@ def process_formulas(content):
687691
return content
688692

689693

690-
def process_image_strings(content, zf, channel_id):
694+
def resize_image(image_content, width, height):
695+
try:
696+
with Image.open(BytesIO(image_content)) as img:
697+
original_format = img.format
698+
img = img.resize((int(width), int(height)), Image.LANCZOS)
699+
buffered = BytesIO()
700+
img.save(buffered, format=original_format)
701+
return buffered.getvalue()
702+
except Exception as e:
703+
logging.warning(f"Error resizing image: {str(e)}")
704+
return None, None
705+
706+
707+
def get_resized_image_checksum(image_content):
708+
return hashlib.md5(image_content).hexdigest()
709+
710+
711+
def process_image_strings(content, zf, channel_id, resized_images_map):
691712
image_list = []
692713
content = content.replace(exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR)
693714
for match in re.finditer(r'!\[(?:[^\]]*)]\(([^\)]+)\)', content):
@@ -710,19 +731,61 @@ def process_image_strings(content, zf, channel_id):
710731
logging.warning("NOTE: the following error would have been swallowed silently in production")
711732
raise
712733

713-
image_name = "images/{}.{}".format(checksum, ext[1:])
714-
if image_name not in zf.namelist():
715-
with storage.open(ccmodels.generate_object_storage_name(checksum, filename), 'rb') as imgfile:
716-
write_to_zipfile(image_name, imgfile.read(), zf)
717-
718-
# Add resizing data
734+
original_image_name = "images/{}.{}".format(checksum, ext[1:])
735+
original_img_ref = match.group(1)
719736
if img_match.group(2) and img_match.group(3):
720-
image_data = {'name': img_match.group(1)}
721-
image_data.update({'width': float(img_match.group(2))})
722-
image_data.update({'height': float(img_match.group(3))})
737+
width, height = float(img_match.group(2)), float(img_match.group(3))
738+
resized_key = (original_image_name, width, height)
739+
740+
# Check if this resized version already exists
741+
new_img_ref = None
742+
if resized_key in resized_images_map:
743+
new_img_ref = resized_images_map[resized_key]
744+
else:
745+
# Check for similar resized images with the same original name
746+
similar_image = None
747+
for key, resized_image in resized_images_map.items():
748+
if (
749+
key[0] == original_image_name
750+
and abs(key[1] - width) / width < 0.01
751+
and abs(key[2] - height) / height < 0.01
752+
):
753+
similar_image = resized_image
754+
break
755+
756+
if similar_image:
757+
new_img_ref = similar_image
758+
else:
759+
with storage.open(ccmodels.generate_object_storage_name(checksum, filename), 'rb') as imgfile:
760+
original_content = imgfile.read()
761+
762+
resized_content = resize_image(original_content, width, height)
763+
764+
if resized_content:
765+
resized_checksum = get_resized_image_checksum(resized_content)
766+
new_image_name = "images/{}.{}".format(resized_checksum, ext[1:])
767+
768+
if new_image_name not in zf.namelist():
769+
write_to_zipfile(new_image_name, resized_content, zf)
770+
new_img_ref = original_img_ref.replace(filename, f"{resized_checksum}{ext}")
771+
resized_images_map[resized_key] = new_img_ref
772+
else:
773+
logging.warning(f"Failed to resize image {filename}. Using original image.")
774+
new_img_ref = img_match.group(1)
775+
776+
new_img_match = re.search(r'(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*', new_img_ref)
777+
image_data = {'name': new_img_match.group(1)}
778+
image_data.update({'width': width})
779+
image_data.update({'height': height})
723780
image_list.append(image_data)
724-
content = content.replace(match.group(1), img_match.group(1))
725-
781+
content = content.replace(original_img_ref, new_img_match.group(1))
782+
783+
else:
784+
if original_image_name not in zf.namelist():
785+
with storage.open(ccmodels.generate_object_storage_name(checksum, filename), 'rb') as imgfile:
786+
original_content = imgfile.read()
787+
write_to_zipfile(original_image_name, original_content, zf)
788+
content = content.replace(match.group(1), img_match.group(1))
726789
return content, image_list
727790

728791

0 commit comments

Comments
 (0)