Skip to content

Commit 0e3c0b6

Browse files
authored
Export an app store archive, and expose it through the dev portal API. (#56)
As described in https://rebble.io/2025/11/24/rebble-in-your-own-world.html
1 parent 21d941f commit 0e3c0b6

File tree

7 files changed

+320
-11
lines changed

7 files changed

+320
-11
lines changed

appstore/ARCHIVE_LICENSE

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
This archive was provided to you by the folks at the Rebble Foundation
2+
(https://rebble.io/)!
3+
4+
It would be EXTREMELY COOL if, anywhere you use data derived from this
5+
collection, you prominently cited Rebble as the source of these data.
6+
7+
It would be EXTREMELY UNCOOL if you did not do that.
8+
9+
Remember that apps are the intellectual property of the original authors.
10+
This archive does not, in and of itself, grant you any rights to modify or
11+
further redistribute the contents (unless otherwise noted in the metadata
12+
for any given app).

appstore/commands.py

Lines changed: 192 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import datetime
2+
import functools
23
import hashlib
4+
import io
35
import json
6+
from queue import *
7+
from threading import Thread, Lock
8+
import tempfile
49
import yaml
510

611
import flask.json
@@ -19,9 +24,9 @@
1924
from algoliasearch import algoliasearch
2025

2126
from .utils import id_generator, algolia_app
22-
from .models import Category, db, App, Developer, Release, CompanionApp, Binary, AssetCollection, LockerEntry, UserLike
27+
from .models import Category, db, App, Developer, Release, CompanionApp, Binary, AssetCollection, LockerEntry, UserLike, Collection, AvailableArchive
2328
from .pbw import PBW, release_from_pbw
24-
from .s3 import upload_pbw, upload_asset
29+
from .s3 import upload_pbw, upload_asset, download_pbw, download_asset, upload_archive
2530
from .settings import config
2631

2732
if config['ALGOLIA_ADMIN_API_KEY']:
@@ -578,6 +583,191 @@ def path(base):
578583
else:
579584
algolia_index.delete_objects([app_obj.id])
580585

586+
def export_archive_to_zip(fn, test_only=False, n_threads=20):
587+
# test-only: only output a few files, so you can run this without a fast
588+
# connection to gcs
589+
assets = set()
590+
binaries = set()
591+
592+
def _mk_release(rel):
593+
binaries.add(rel.id)
594+
595+
return {
596+
'has_pbw': rel.has_pbw,
597+
'binaries': { plat: {
598+
'sdk_major': b.sdk_major,
599+
'sdk_minor': b.sdk_minor,
600+
'process_info_flags': b.process_info_flags,
601+
'icon_resource_id': b.icon_resource_id,
602+
} for plat,b in rel.binaries.items() },
603+
'capabilities': rel.capabilities,
604+
'js_md5': rel.js_md5,
605+
'published_date': rel.published_date.timestamp() if rel.published_date else None,
606+
'release_notes': rel.release_notes,
607+
'version': rel.version,
608+
'compatibility': rel.compatibility,
609+
}
610+
611+
def _mk_asset_collection(ass):
612+
for img in ass.headers:
613+
assets.add(img)
614+
assets.add(ass.banner)
615+
616+
return {
617+
'description': ass.description,
618+
'screenshots': ass.screenshots,
619+
'headers': ass.headers,
620+
'banner': ass.banner,
621+
}
622+
623+
def _mk_app(app):
624+
if _mk_app.n % 1000 == 0:
625+
print(f"... {_mk_app.n} / {_mk_app.ntotal} ...")
626+
_mk_app.n += 1
627+
assets.add(app.icon_large)
628+
assets.add(app.icon_small)
629+
return {
630+
'uuid': app.app_uuid,
631+
'asset_collections': { plat: _mk_asset_collection(ass) for plat,ass in app.asset_collections.items() },
632+
'category_id': app.category_id,
633+
'companion_apps': { plat:
634+
{
635+
'icon': c.icon,
636+
'url': c.url,
637+
'name': c.name,
638+
'pebblekit3': c.pebblekit3,
639+
} for plat,c in app.companions.items() },
640+
'collection_ids': [ c.id for c in app.collections ],
641+
'created_at': app.created_at.timestamp() if app.created_at else None,
642+
'developer_id': app.developer_id,
643+
'hearts': app.hearts,
644+
'releases': { rel.id: _mk_release(rel) for rel in app.releases if rel.is_published },
645+
'icon_large': app.icon_large,
646+
'icon_small': app.icon_small,
647+
'published_date': app.published_date.timestamp() if app.published_date else None,
648+
'source': app.source,
649+
'title': app.title,
650+
'timeline_enabled': app.timeline_enabled,
651+
'type': app.type,
652+
'website': app.website,
653+
}
654+
_mk_app.n = 0
655+
656+
with zipfile.ZipFile(fn, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
657+
with zf.open("metadata/apps.json", 'w') as appsf:
658+
print(f"Querying apps...")
659+
_mk_app.ntotal = App.query.filter(App.visible == True).count()
660+
apps = { app.id: _mk_app(app) for app in App.query.filter(App.visible == True).yield_per(1000) if app.visible }
661+
print(f"Exporting apps.json...")
662+
json.dump(apps, io.TextIOWrapper(appsf))
663+
664+
with zf.open("metadata/categories.json", 'w') as categoriesf:
665+
print(f"Exporting categories.json...")
666+
categories = {
667+
c.id: {
668+
'name': c.name,
669+
'slug': c.slug,
670+
'colour': c.colour,
671+
'icon': c.icon,
672+
'app_type': c.app_type,
673+
'banner_apps': [ app.id for app in c.banner_apps],
674+
} for c in Category.query.filter(Category.is_visible == True)
675+
}
676+
json.dump(categories, io.TextIOWrapper(categoriesf))
677+
678+
with zf.open("metadata/collections.json", 'w') as collectionsf:
679+
print(f"Exporting collections.json...")
680+
collections = {
681+
c.id: {
682+
'name': c.name,
683+
'slug': c.slug,
684+
'app_type': c.app_type,
685+
'platforms': c.platforms,
686+
} for c in Collection.query
687+
}
688+
json.dump(collections, io.TextIOWrapper(collectionsf))
689+
690+
with zf.open("metadata/developers.json", 'w') as developersf:
691+
print(f"Exporting developers.json...")
692+
developers = {
693+
d.id: d.name
694+
for d in Developer.query
695+
}
696+
697+
# XXX: at some point it might be nice to also provide a Rebble
698+
# user ID for a developer? though I guess also developers who
699+
# want to provide this to a user of the archive to verify
700+
# themselves could just as well give a user oauth creds to
701+
# verify their developer_id
702+
json.dump(developers, io.TextIOWrapper(developersf))
703+
704+
zip_targets = Queue()
705+
downloads_failed = {}
706+
707+
for ass in assets:
708+
if ass == "" or ass is None:
709+
continue
710+
zip_targets.put((f"assets/{ass[0]}/{ass[1]}/{ass}", functools.partial(download_asset, ass)))
711+
712+
for pbw in binaries:
713+
if pbw == "" or pbw is None:
714+
continue
715+
zip_targets.put((f"binaries/{pbw[0]}/{pbw[1]}/{pbw}.pbw", functools.partial(download_pbw, pbw)))
716+
717+
n = 0
718+
zip_lock = Lock()
719+
def download_thread():
720+
nonlocal n, downloads_failed, zip_targets
721+
while True:
722+
try:
723+
fname, download = zip_targets.get_nowait()
724+
except Empty:
725+
return
726+
727+
try:
728+
if test_only and n > 50:
729+
raise TimeoutError()
730+
buf = io.BytesIO()
731+
download(buf)
732+
with zip_lock, zf.open(fname, 'w') as zff:
733+
zff.write(buf.getbuffer())
734+
buf.close()
735+
except Exception as e:
736+
downloads_failed[fname] = repr(e)
737+
738+
if (n % 100) == 0:
739+
print(f"... {n} done, {zip_targets.qsize()} to go ...")
740+
n += 1
741+
742+
zip_targets.task_done()
743+
744+
for i in range(n_threads):
745+
Thread(target=download_thread).start()
746+
zip_targets.join()
747+
748+
with zf.open("metadata/failed_downloads.json", "w") as failedf:
749+
json.dump(downloads_failed, io.TextIOWrapper(failedf))
750+
751+
with open(f"{os.path.dirname(__file__)}/ARCHIVE_LICENSE", "rb") as rf, zf.open("LICENSE.txt", "w") as wf:
752+
wf.write(rf.read())
753+
754+
@apps.command('export-archive')
755+
@click.option('--upload', is_flag=True)
756+
@click.option('--output')
757+
@click.option('--test', is_flag=True) # only dump 100 binaries of each type
758+
def export_archive(output, upload, test):
759+
print(f"Preparing to export archive...")
760+
if not output:
761+
output = tempfile.TemporaryFile()
762+
export_archive_to_zip(output, test_only=test)
763+
if upload:
764+
now = datetime.datetime.now()
765+
filename = f"appstore-archive-{now.year:04d}{now.month:02d}.zip"
766+
print(f"uploading to {filename}")
767+
upload_archive(filename, output)
768+
db.session.add(AvailableArchive(filename=filename, created_at=datetime.datetime.now()))
769+
db.session.commit()
770+
581771

582772
def init_app(app):
583773
app.cli.add_command(apps)

appstore/developer_portal_api.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import secrets
44

55
from algoliasearch import algoliasearch
6-
from flask import Blueprint, jsonify, abort, request
6+
from flask import Blueprint, jsonify, abort, request, redirect
77
from flask_cors import CORS
88

99
from sqlalchemy.exc import IntegrityError
@@ -12,10 +12,10 @@
1212
from sqlalchemy.exc import DataError
1313
from zipfile import BadZipFile
1414

15-
from .utils import demand_authed_request, id_generator, validate_new_app_fields, is_valid_category, is_valid_appinfo, is_valid_platform, clone_asset_collection_without_images, is_valid_image_file, is_valid_image_size, get_max_image_dimensions, is_users_developer_id, user_is_wizard, newAppValidationException, algolia_app, first_version_is_newer
16-
from .models import db, App, Developer, Release, AssetCollection
15+
from .utils import demand_authed_request, id_generator, validate_new_app_fields, is_valid_category, is_valid_appinfo, is_valid_platform, clone_asset_collection_without_images, is_valid_image_file, is_valid_image_size, get_max_image_dimensions, is_users_developer_id, user_is_wizard, newAppValidationException, algolia_app, first_version_is_newer, get_uid
16+
from .models import db, App, Developer, Release, AssetCollection, AvailableArchive
1717
from .pbw import PBW, release_from_pbw
18-
from .s3 import upload_pbw, upload_asset
18+
from .s3 import upload_pbw, upload_asset, get_link_for_archive
1919
from .settings import config
2020
from .discord import announce_release, announce_new_app, audit_log
2121

@@ -768,6 +768,13 @@ def wizard_get_s3_assets(app_id):
768768
return jsonify(images = images, pbws = pbws)
769769

770770

771+
@devportal_api.route('/archive/latest', methods=['GET'])
772+
def download_archive():
773+
uid = get_uid() # unused, does auth so AI scrapers don't waste all our bandwidth, though
774+
775+
archive = AvailableArchive.query.order_by(AvailableArchive.created_at.desc()).limit(1).one()
776+
return redirect(get_link_for_archive(archive.filename))
777+
771778
def init_app(app, url_prefix='/api/dp'):
772779
global parent_app
773780
parent_app = app

appstore/models.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class App(db.Model):
6060
collections = db.relationship('Collection',
6161
back_populates='apps',
6262
secondary=collection_apps,
63-
passive_deletes=True)
63+
passive_deletes=True,
64+
lazy='selectin')
6465
created_at = db.Column(db.DateTime)
6566
developer_id = db.Column(db.String(24), db.ForeignKey('developers.id'))
6667
developer = db.relationship('Developer', lazy='joined')
@@ -178,6 +179,15 @@ class UserFlag(db.Model):
178179
app = db.relationship('App')
179180
db.Index('user_flag_app_user_index', UserFlag.app_id, UserFlag.user_id, unique=True)
180181

182+
class AvailableArchive(db.Model):
183+
"""
184+
Archives in S3 of the appstore database.
185+
"""
186+
__tablename__ = "available_archives"
187+
id = db.Column(db.Integer(), primary_key=True, index=True)
188+
created_at = db.Column(db.DateTime, index=True)
189+
filename = db.Column(db.String)
190+
181191
def init_app(app):
182192
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
183193
db.init_app(app)

appstore/s3.py

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
import json
22
import boto3
3+
import threading
4+
from botocore.exceptions import ClientError
5+
from .models import Binary
36
from .settings import config
47
from .utils import id_generator
58

69
# Try to find a way to get S3 credentials.
710
session = None
811
s3_endpoint = None
12+
session_lock = threading.Lock()
913

1014
# Try loading creds from the environment.
1115
try:
@@ -45,19 +49,39 @@
4549
if not session:
4650
print("no session")
4751

52+
_clients = {}
53+
54+
def _client_for_endpoint(endpoint):
55+
me = threading.current_thread()
56+
if (me, endpoint) in _clients:
57+
return _clients[(me, endpoint)]
58+
with session_lock:
59+
s3 = session.client('s3', endpoint_url=endpoint)
60+
_clients[(me, endpoint)] = s3
61+
return s3
62+
4863
def upload_pbw(release, file):
4964
filename = f"{config['S3_PATH']}{release.id}.pbw"
5065

5166
if isinstance(file, str):
5267
print(f"uploading file {file} to {config['S3_BUCKET']}:{filename}")
53-
s3 = session.client('s3', endpoint_url=s3_endpoint)
68+
s3 = _client_for_endpoint(s3_endpoint)
5469
s3.upload_file(file, config['S3_BUCKET'], filename)
5570
else:
5671
print(f"uploading file object {file.name} to {config['S3_BUCKET']}:{filename}")
57-
s3 = session.client('s3', endpoint_url=s3_endpoint)
72+
s3 = _client_for_endpoint(s3_endpoint)
5873
file.seek(0)
5974
s3.upload_fileobj(file, config['S3_BUCKET'], filename, ExtraArgs = {'ContentType': 'application/zip'})
6075

76+
def download_pbw(id, file):
77+
filename = f"{config['S3_PATH']}{id}.pbw"
78+
s3 = _client_for_endpoint(s3_endpoint)
79+
if isinstance(file, str):
80+
s3.download_file(config['S3_BUCKET'], filename, file)
81+
else:
82+
s3.download_fileobj(config['S3_BUCKET'], filename, file)
83+
84+
6185
def upload_asset(file, mime_type = None):
6286
id = id_generator.generate()
6387
filename = f"{config['S3_ASSET_PATH']}{id}"
@@ -74,14 +98,41 @@ def upload_asset(file, mime_type = None):
7498
else:
7599
raise Exception("Unknown or unsupported mime_type for file provided to update_asset")
76100

77-
s3 = session.client('s3', endpoint_url=s3_endpoint)
101+
s3 = _client_for_endpoint(s3_endpoint)
78102
s3.upload_file(file, config['S3_ASSET_BUCKET'], filename, ExtraArgs = {'ContentType': mime_type})
79103
return id
80104

81105
else:
82106
print(f"uploading file object '{file.name}' to {config['S3_ASSET_BUCKET']}:{filename}")
83107
file.seek(0)
84-
s3 = session.client('s3', endpoint_url=s3_endpoint)
108+
s3 = _client_for_endpoint(s3_endpoint)
85109
s3.upload_fileobj(file, config['S3_ASSET_BUCKET'], filename, ExtraArgs = {'ContentType': mime_type})
86110

87111
return id
112+
113+
def download_asset(id, file):
114+
filename = f"{config['S3_ASSET_PATH']}{id}"
115+
s3 = _client_for_endpoint(s3_endpoint)
116+
if isinstance(file, str):
117+
s3.download_file(config['S3_ASSET_BUCKET'], filename, file)
118+
else:
119+
s3.download_fileobj(config['S3_ASSET_BUCKET'], filename, file)
120+
121+
def upload_archive(filename, file, mime_type = 'application/zip'):
122+
s3_filename = f"{config['S3_ARCHIVE_PATH']}{filename}"
123+
s3 = _client_for_endpoint(s3_endpoint)
124+
if isinstance(file, str):
125+
s3.upload_file(file, config['S3_ARCHIVE_BUCKET'], s3_filename, ExtraArgs = { 'ContentType': mime_type })
126+
else:
127+
file.seek(0)
128+
s3.upload_fileobj(file, config['S3_ARCHIVE_BUCKET'], s3_filename, ExtraArgs = { 'ContentType': mime_type })
129+
130+
def get_link_for_archive(filename, expiry = 3600):
131+
s3 = _client_for_endpoint(s3_endpoint)
132+
return s3.generate_presigned_url('get_object',
133+
Params={
134+
'Bucket': config['S3_ARCHIVE_BUCKET'],
135+
'Key': f"{config['S3_ARCHIVE_PATH']}{filename}"
136+
},
137+
ExpiresIn=expiry
138+
)

0 commit comments

Comments
 (0)