1+ import hashlib
12import itertools
23import json
34import logging as logmodule
910import uuid
1011import zipfile
1112from copy import deepcopy
13+ from io import BytesIO
1214from itertools import chain
1315
1416from django .conf import settings
3941from le_utils .constants import file_formats
4042from le_utils .constants import format_presets
4143from le_utils .constants import roles
44+ from PIL import Image
4245from search .models import ChannelFullTextSearch
4346from search .models import ContentNodeFullTextSearch
4447from search .utils import get_fts_annotated_channel_qs
@@ -488,10 +491,11 @@ def create_perseus_exercise(ccnode, kolibrinode, exercise_data, user_id=None):
488491 logging .debug ("Creating Perseus Exercise for Node {}" .format (ccnode .title ))
489492 filename = "{0}.{ext}" .format (ccnode .title , ext = file_formats .PERSEUS )
490493 temppath = None
494+ resized_images_map = {}
491495 try :
492496 with tempfile .NamedTemporaryFile (suffix = "zip" , delete = False ) as tempf :
493497 temppath = tempf .name
494- create_perseus_zip (ccnode , exercise_data , tempf )
498+ create_perseus_zip (ccnode , exercise_data , tempf , resized_images_map )
495499 file_size = tempf .tell ()
496500 tempf .flush ()
497501
@@ -568,7 +572,7 @@ def process_assessment_metadata(ccnode, kolibrinode):
568572 return exercise_data
569573
570574
571- def create_perseus_zip (ccnode , exercise_data , write_to_path ):
575+ def create_perseus_zip (ccnode , exercise_data , write_to_path , resized_images_map ):
572576 with zipfile .ZipFile (write_to_path , "w" ) as zf :
573577 try :
574578 exercise_context = {
@@ -597,7 +601,7 @@ def create_perseus_zip(ccnode, exercise_data, write_to_path):
597601 content = content .split (exercises .GRAPHIE_DELIMITER .encode ('ascii' ))
598602 write_to_zipfile (svg_name , content [0 ], zf )
599603 write_to_zipfile (json_name , content [1 ], zf )
600- write_assessment_item (question , zf , channel_id )
604+ write_assessment_item (question , zf , channel_id , resized_images_map )
601605 except Exception as e :
602606 logging .error ("Error while publishing channel `{}`: {}" .format (channel_id , str (e )))
603607 logging .error (traceback .format_exc ())
@@ -622,7 +626,7 @@ def write_to_zipfile(filename, content, zf):
622626 zf .writestr (info , content )
623627
624628
625- def write_assessment_item (assessment_item , zf , channel_id ): # noqa C901
629+ def write_assessment_item (assessment_item , zf , channel_id , resized_images_map ): # noqa C901
626630 if assessment_item .type == exercises .MULTIPLE_SELECTION :
627631 template = 'perseus/multiple_selection.json'
628632 elif assessment_item .type == exercises .SINGLE_SELECTION or assessment_item .type == 'true_false' :
@@ -635,7 +639,7 @@ def write_assessment_item(assessment_item, zf, channel_id): # noqa C901
635639 raise TypeError ("Unrecognized question type on item {}" .format (assessment_item .assessment_id ))
636640
637641 question = process_formulas (assessment_item .question )
638- question , question_images = process_image_strings (question , zf , channel_id )
642+ question , question_images = process_image_strings (question , zf , channel_id , resized_images_map )
639643
640644 answer_data = json .loads (assessment_item .answers )
641645 for answer in answer_data :
@@ -645,14 +649,14 @@ def write_assessment_item(assessment_item, zf, channel_id): # noqa C901
645649 answer ['answer' ] = answer ['answer' ].replace (exercises .CONTENT_STORAGE_PLACEHOLDER , PERSEUS_IMG_DIR )
646650 answer ['answer' ] = process_formulas (answer ['answer' ])
647651 # In case perseus doesn't support =wxh syntax, use below code
648- answer ['answer' ], answer_images = process_image_strings (answer ['answer' ], zf , channel_id )
652+ answer ['answer' ], answer_images = process_image_strings (answer ['answer' ], zf , channel_id , resized_images_map )
649653 answer .update ({'images' : answer_images })
650654
651655 answer_data = [a for a in answer_data if a ['answer' ] or a ['answer' ] == 0 ] # Filter out empty answers, but not 0
652656 hint_data = json .loads (assessment_item .hints )
653657 for hint in hint_data :
654658 hint ['hint' ] = process_formulas (hint ['hint' ])
655- hint ['hint' ], hint_images = process_image_strings (hint ['hint' ], zf , channel_id )
659+ hint ['hint' ], hint_images = process_image_strings (hint ['hint' ], zf , channel_id , resized_images_map )
656660 hint .update ({'images' : hint_images })
657661
658662 answers_sorted = answer_data
@@ -687,7 +691,24 @@ def process_formulas(content):
687691 return content
688692
689693
690- def process_image_strings (content , zf , channel_id ):
694+ def resize_image (image_content , width , height ):
695+ try :
696+ with Image .open (BytesIO (image_content )) as img :
697+ original_format = img .format
698+ img = img .resize ((int (width ), int (height )), Image .LANCZOS )
699+ buffered = BytesIO ()
700+ img .save (buffered , format = original_format )
701+ return buffered .getvalue ()
702+ except Exception as e :
703+ logging .warning (f"Error resizing image: { str (e )} " )
704+ return None , None
705+
706+
707+ def get_resized_image_checksum (image_content ):
708+ return hashlib .md5 (image_content ).hexdigest ()
709+
710+
711+ def process_image_strings (content , zf , channel_id , resized_images_map ):
691712 image_list = []
692713 content = content .replace (exercises .CONTENT_STORAGE_PLACEHOLDER , PERSEUS_IMG_DIR )
693714 for match in re .finditer (r'!\[(?:[^\]]*)]\(([^\)]+)\)' , content ):
@@ -710,19 +731,61 @@ def process_image_strings(content, zf, channel_id):
710731 logging .warning ("NOTE: the following error would have been swallowed silently in production" )
711732 raise
712733
713- image_name = "images/{}.{}" .format (checksum , ext [1 :])
714- if image_name not in zf .namelist ():
715- with storage .open (ccmodels .generate_object_storage_name (checksum , filename ), 'rb' ) as imgfile :
716- write_to_zipfile (image_name , imgfile .read (), zf )
717-
718- # Add resizing data
734+ original_image_name = "images/{}.{}" .format (checksum , ext [1 :])
735+ original_img_ref = match .group (1 )
719736 if img_match .group (2 ) and img_match .group (3 ):
720- image_data = {'name' : img_match .group (1 )}
721- image_data .update ({'width' : float (img_match .group (2 ))})
722- image_data .update ({'height' : float (img_match .group (3 ))})
737+ width , height = float (img_match .group (2 )), float (img_match .group (3 ))
738+ resized_key = (original_image_name , width , height )
739+
740+ # Check if this resized version already exists
741+ new_img_ref = None
742+ if resized_key in resized_images_map :
743+ new_img_ref = resized_images_map [resized_key ]
744+ else :
745+ # Check for similar resized images with the same original name
746+ similar_image = None
747+ for key , resized_image in resized_images_map .items ():
748+ if (
749+ key [0 ] == original_image_name
750+ and abs (key [1 ] - width ) / width < 0.01
751+ and abs (key [2 ] - height ) / height < 0.01
752+ ):
753+ similar_image = resized_image
754+ break
755+
756+ if similar_image :
757+ new_img_ref = similar_image
758+ else :
759+ with storage .open (ccmodels .generate_object_storage_name (checksum , filename ), 'rb' ) as imgfile :
760+ original_content = imgfile .read ()
761+
762+ resized_content = resize_image (original_content , width , height )
763+
764+ if resized_content :
765+ resized_checksum = get_resized_image_checksum (resized_content )
766+ new_image_name = "images/{}.{}" .format (resized_checksum , ext [1 :])
767+
768+ if new_image_name not in zf .namelist ():
769+ write_to_zipfile (new_image_name , resized_content , zf )
770+ new_img_ref = original_img_ref .replace (filename , f"{ resized_checksum } { ext } " )
771+ resized_images_map [resized_key ] = new_img_ref
772+ else :
773+ logging .warning (f"Failed to resize image { filename } . Using original image." )
774+ new_img_ref = img_match .group (1 )
775+
776+ new_img_match = re .search (r'(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*' , new_img_ref )
777+ image_data = {'name' : new_img_match .group (1 )}
778+ image_data .update ({'width' : width })
779+ image_data .update ({'height' : height })
723780 image_list .append (image_data )
724- content = content .replace (match .group (1 ), img_match .group (1 ))
725-
781+ content = content .replace (original_img_ref , new_img_match .group (1 ))
782+
783+ else :
784+ if original_image_name not in zf .namelist ():
785+ with storage .open (ccmodels .generate_object_storage_name (checksum , filename ), 'rb' ) as imgfile :
786+ original_content = imgfile .read ()
787+ write_to_zipfile (original_image_name , original_content , zf )
788+ content = content .replace (match .group (1 ), img_match .group (1 ))
726789 return content , image_list
727790
728791
0 commit comments