diff --git a/config.py b/config.py new file mode 100644 index 0000000..5e5109b --- /dev/null +++ b/config.py @@ -0,0 +1,15 @@ +# use database residing here +DB_LOCATION = ( + "testbox/photovoter.dblite" # Q: any allowances for this being not OUR database? +) + +DATA_LOCATION = "/tmp/123" + +# place compressed images here (needs to exist) +DEST_SHRUNK = "image/" +# move originals here (needs to exist) +DEST_ORIGINAL = "original/" + +# upload interface credentials +CRED_USERNAME = "changeme" +CRED_PASSWORD = "CHANGEME" diff --git a/main.py b/main.py index 80cccb0..d6f9221 100644 --- a/main.py +++ b/main.py @@ -1,18 +1,33 @@ -from fastapi import FastAPI +from fastapi import FastAPI, File, UploadFile, Depends, BackgroundTasks from fastapi.responses import JSONResponse +from fastapi.security import HTTPBasic, HTTPBasicCredentials from fastapi.middleware.cors import CORSMiddleware # CORS +from secrets import compare_digest from datetime import datetime from uuid import uuid4 import sqlite3 +import zipfile - -# use database residing here -DB_LOCATION = ( - "db/photovoter.dblite" # Q: any allowances for this being not OUR database? +# Global settings of this program +# ./config.py +from config import ( + DB_LOCATION, + DATA_LOCATION, + DEST_SHRUNK, + DEST_ORIGINAL, + CRED_USERNAME, + CRED_PASSWORD, ) + +# our own util for photo upload and processing +from util import import_photos as iph + +# Initialization logic app = FastAPI() +security = HTTPBasic() +iph.check_database(database_path=DB_LOCATION) con = sqlite3.connect(DB_LOCATION) con.row_factory = sqlite3.Row cur = con.cursor() # NB! single is enough for now, we might require multiple later @@ -172,3 +187,89 @@ async def photo_points(): } for point in points ] + + +@app.post( + "/upload_pictures", + responses={ + 202: {"description": "Archive accepted into processing"}, + 401: {"description": "Authentication is required to access this resource"}, + 415: {"description": "Cannot process uploaded archive"}, + }, +) +async def upload_pictures( + background_tasks: BackgroundTasks, + credentials: HTTPBasicCredentials = Depends(security), + file: UploadFile = File(...), +): + """Photo upload endpoint""" + """ + Accepts photo in zip archives with any internal directory structure + Valid uploads yield 202 status message and process photos in the background + Non-zip archives yeild 415 error + Upload is restricted by basic HTTP login, configurable in config.py + """ + # check authenticity + correct_username = compare_digest(credentials.username, CRED_USERNAME) + correct_password = compare_digest(credentials.password, CRED_PASSWORD) + if not (correct_username and correct_password): + return JSONResponse(status_code=401) + # slurp the zip + if not zipfile.is_zipfile(file.file): + return JSONResponse(status_code=415) + # detach from the interface + # unpack zip + tasks = BackgroundTasks() + tasks.add_task( + unpack_pictures_zip, + file=file, + time=datetime.utcnow().replace(microsecond=0), + ) + + # feed the pictures to util/import_photos.py + return JSONResponse("Accepted", status_code=202, background=tasks) + + +def unpack_pictures_zip(file: UploadFile, time): + """ + Unpack and process zip archived photo + Extract pictures in the DATA_LOCATION/processing + and feed them to util/import_photos.py + Walk the nested DATA_LOCATION/processing ourselves + Uses: DB_LOCATION, DATA_LOCATION + """ + # we only call this function sporadically, so import here + import os + from shutil import rmtree + + print(f"Accepted {file.filename} at {time.isoformat()} into processing") + processing_path = os.path.join(DATA_LOCATION, "processing" + str(time.timestamp())) + os.makedirs(processing_path, exist_ok=True) + + # using private ._file field is a dirty hack, but + # SpooledTemporaryFile does not implement seekable + # required by zipfile 'r' mode + # https://bugs.python.org/issue26175 + with zipfile.ZipFile(file.file._file) as photo_zip: + problem_files = photo_zip.testzip() + if problem_files is not None: + print( + f"Errors in {file.filename} from {time.isoformat()} at {problem_files}" + ) + photo_zip.extractall(path=processing_path) + photo_zip.close() + + print(f"Start processing {file.filename} from {time.isoformat()}") + + iph.check_database(database_path=DB_LOCATION) + for (dir, _, _) in os.walk(processing_path): + iph.run( + db_location=DB_LOCATION, + source=os.path.join(dir), + dest_shrunk=os.path.join(DATA_LOCATION, os.path.normcase(DEST_SHRUNK)), + dest_original=os.path.join(DATA_LOCATION, os.path.normcase(DEST_ORIGINAL)), + ) + + rmtree(processing_path) + + print(f"Succesfully processed {file.filename} from {time.isoformat()}") diff --git a/requirements.txt b/requirements.txt index 58ce903..8c2bdf8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,9 @@ pathspec==0.8.1 pycodestyle==2.7.0 pydantic==1.8.2 pyflakes==2.3.1 +python-multipart==0.0.5 regex==2021.4.4 +six==1.16.0 starlette==0.14.2 toml==0.10.2 typing-extensions==3.10.0.0 diff --git a/util/import_photos.py b/util/import_photos.py index 906430d..4e80210 100644 --- a/util/import_photos.py +++ b/util/import_photos.py @@ -8,32 +8,17 @@ from sys import argv, stderr from shutil import move import sqlite3 -# update database residing here -DB_LOCATION = ( - "db/photovoter.dblite" # Q: any allowances for this being not OUR database? -) -# place compressed images here (needs to exist) -DEST_SHRUNK = "db/image/" -# move originals here (needs to exist) -DEST_ORIGINAL = "db/original/" - -def usage(): - """Brief usage explanation""" - print("USAGE: python {name} /path/to/images".format(name=argv[0]), file=stderr) - - -def process_pictures(): +def process_pictures(source: str, dest_shrunk: str, dest_original: str): """Process images from the base directory in the first command line argument. - Place the resized copies to DEST_SHRUNK and - move the originals to DEST_ORIGINAL. + Place the resized copies to dest_shrunk and + move the originals to dest_original. Return a dict for each image processed for database collection. - Uses: DEST_SHRUNK, DEST_ORIGINAL """ # walk every pic # We only care about files in the root of the path # Ignore any nested directories - (root, _, filenames) = next(walk(argv[1], topdown=True), (None, None, [])) + (root, _, filenames) = next(walk(source, topdown=True), (None, None, [])) for filename in filenames: # FIXME[0]:what if picture with the same name already exists? # skip any non-image files @@ -50,34 +35,37 @@ def process_pictures(): cloned.strip() # Q: may damage icc, do we allow that or use smh else? cloned.transform(resize="50%") # Q: what do we want here? # move them to the processed folder - cloned.save(filename=path.join(DEST_SHRUNK, filename)) + cloned.save(filename=path.join(dest_shrunk, filename)) # move the originals out of the working directory # Q: do we strip exif from originals? - move(path.join(root, filename), DEST_ORIGINAL) - - # return the freshly processed picture info - yield { - "ResizedImage": path.join(DEST_SHRUNK, filename), - "OriginalImage": path.join(DEST_ORIGINAL, filename), - "DateTimeOriginal": exif["DateTimeOriginal"], # Q: normalize it? - "GPSLatitude": exif["GPSLatitude"], - "GPSLatitudeRef": exif["GPSLatitudeRef"], - "GPSLongitude": exif["GPSLongitude"], - "GPSLongitudeRef": exif["GPSLongitudeRef"], - } + move(path.join(root, filename), dest_original) + + try: + # return the freshly processed picture info + yield { + "ResizedImage": path.join(dest_shrunk, filename), + "OriginalImage": path.join(dest_original, filename), + "DateTimeOriginal": exif["DateTimeOriginal"], # Q: normalize it? + "GPSLatitude": exif["GPSLatitude"], + "GPSLatitudeRef": exif["GPSLatitudeRef"], + "GPSLongitude": exif["GPSLongitude"], + "GPSLongitudeRef": exif["GPSLongitudeRef"], + } + except KeyError as e: + print(f"Image '{filename}' has no valid exif") + continue -def update_database(pic_info: dict): +def update_database(pic_info: dict, db_location: str): """Append new image information to the existing database or create a new one, if it does not exist yet - Uses: DB_LOCATION """ # make sure the database exists - check_database(DB_LOCATION) + check_database(db_location) # FIXME[1]: closure it, so we open it only once? - con = sqlite3.connect(DB_LOCATION) + con = sqlite3.connect(db_location) cur = con.cursor() # insert new pictures to the image table cur.execute( @@ -114,7 +102,7 @@ def check_database(database_path: str): return # make one else: - print("No DB, creating", database_path) + print("No DB, creating", path.abspath(database_path)) con = sqlite3.connect(database_path) cur = con.cursor() @@ -169,23 +157,46 @@ def check_database(database_path: str): con.close() -def main(): - if len(argv) != 2: - usage() - exit(1) - +def run(db_location: str, source: str, dest_shrunk: str, dest_original: str): + """Core program logic""" pics_processed = 0 # process each pic and add it to the database - for pic in process_pictures(): - update_database(pic) + for pic in process_pictures(source, dest_shrunk, dest_original): + update_database(pic, db_location) pics_processed += 1 if pics_processed == 0: - print("No more pictures processed from", argv[1]) + print("No pictures processed from", source) print("Do we have enough permissions?") else: print("Pictures processed:", pics_processed) +def usage(): + """Brief usage explanation""" + print("USAGE: python {name} /path/to/images".format(name=argv[0]), file=stderr) + + +def main(): + if len(argv) != 2: + usage() + exit(1) + + import sys + import os + + # append root directory to sys.path + # to allow import globals from ../config.py + sys.path.append(os.path.dirname(__file__) + "/..") + import config as cfg + + run( + cfg.DB_LOCATION, + argv[1], + path.normcase(cfg.DEST_SHRUNK), + path.normcase(cfg.DEST_ORIGINAL), + ) + + if __name__ == "__main__": main()