diff --git a/README.md b/README.md index 4f681d1..2ac4375 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,54 @@ uv run ai-migrate merge-branches uv run ai-migrate migrate --help ``` +### S3 Upload Support + +The tool can automatically upload migration results and logs to S3. Files are organized by: +- Project +- Timestamp +- Result type (pass/fail) +- Logs + +To enable S3 uploads: + +1. Configure AWS credentials using any standard method: +```bash +# Environment variables +export AWS_ACCESS_KEY_ID="your_access_key" +export AWS_SECRET_ACCESS_KEY="your_secret_key" +export AWS_DEFAULT_REGION="us-west-2" + +# Or use AWS credentials file (~/.aws/credentials) +# Or use IAM roles if running on AWS infrastructure +``` + +2. Specify the S3 bucket in one of two ways: +```bash +# Command line argument +uv run ai-migrate migrate --s3-bucket my-bucket path/to/files + +# Or environment variable +export AI_MIGRATE_S3_BUCKET="my-bucket" +uv run ai-migrate migrate path/to/files +``` + +The files will be uploaded with this structure: +``` +bucket/ + project-name/ + attempt-20250326-184018/ + manifest.json + pass/ + file1.java + file2.java + fail/ + file3.java +``` + +#### Note + If you set the bucket name to `localhost` - the system will save the results locally to `~/ai-migration-results` + + ### Project Selection The interactive CLI now provides an easy way to select which migration project to use: @@ -150,6 +198,7 @@ Here's how commands from the main branch map to the new interactive CLI: | `uv run ai-migrate projects run --rerun-passed` | `uv run ai-migrate migrate --rerun-passed` | Re-run migrations that have already passed | | `uv run ai-migrate projects run --max-workers=` | `uv run ai-migrate migrate --max-workers=` | Set maximum number of parallel workers | | `uv run ai-migrate projects run --local-worktrees` | `uv run ai-migrate migrate --local-worktrees` | Create worktrees alongside the git repo | +| `uv run ai-migrate projects run --s3-bucket=` | `uv run ai-migrate migrate --s3-bucket=` | Upload results to S3 bucket | | `uv run ai-migrate projects checkout ` | `uv run ai-migrate checkout ` | Check out the branch for a failed migration | | `uv run ai-migrate projects merge-branches` | `uv run ai-migrate merge-branches` | Merge changes from migrator branches | diff --git a/pyproject.toml b/pyproject.toml index 04596a2..294386f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "rich>=13.0.0", "prompt_toolkit>=3.0.0", "tiktoken>=0.9.0", + "boto3>=1.34.0", ] [project.scripts] diff --git a/src/ai_migrate/cli.py b/src/ai_migrate/cli.py index a96d27e..8d3722a 100644 --- a/src/ai_migrate/cli.py +++ b/src/ai_migrate/cli.py @@ -716,6 +716,11 @@ def project_dir_validate(ctx, param, project_dir): is_flag=True, help="Don't automatically create evaluations after successful migrations", ) +@click.option( + "--s3-bucket", + help="S3 bucket name for storing migration results (can also use AI_MIGRATE_S3_BUCKET env var). Pass 'localhost' to save locally to '~/ai-migration-results'", + envvar="AI_MIGRATE_S3_BUCKET", +) def migrate( file_paths, project_dir, @@ -726,6 +731,7 @@ def migrate( local_worktrees, llm_fakes, dont_create_evals, + s3_bucket, ): """Migrate one or more files or manage project resources. @@ -788,6 +794,7 @@ def migrate( resume=True, llm_fakes=llm_fakes, dont_create_evals=dont_create_evals, + s3_bucket=s3_bucket, ) ) diff --git a/src/ai_migrate/projects.py b/src/ai_migrate/projects.py index b85094b..5d970fb 100644 --- a/src/ai_migrate/projects.py +++ b/src/ai_migrate/projects.py @@ -69,6 +69,7 @@ async def run( resume: bool = True, llm_fakes=None, dont_create_evals: bool = False, + s3_bucket: str | None = None, ) -> list[FileGroup]: """Run an AI migration project.""" if manifest_file: @@ -179,14 +180,14 @@ async def process_one_with_sem(index, files: FileGroup, task_name: str): for fn in file.files: print(fn) - ts = datetime.now().strftime("%Y%m%d-%H%M%S") - results_file = f"manifest-{ts}.json" + timestamp = datetime.now() + results_file = f"manifest-{timestamp.strftime('%Y%m%d-%H%M%S')}.json" with open(results_file, "w") as f: result_manifest = manifest.model_copy( update={ "files": results, "eval_target_repo_ref": target_sha, - "time": datetime.now(), + "time": timestamp, } ) f.write( @@ -196,6 +197,19 @@ async def process_one_with_sem(index, files: FileGroup, task_name: str): ) print(f"Results saved to {results_file}") + + if s3_bucket: + from .s3_uploader import S3Uploader + + uploader = S3Uploader(s3_bucket) + print("Results upload started") + await uploader.upload_results( + project=Path(project_dir).name, + results=results, + results_file=Path(results_file), + timestamp=timestamp, + ) + return results diff --git a/src/ai_migrate/s3_uploader.py b/src/ai_migrate/s3_uploader.py new file mode 100644 index 0000000..0ccc307 --- /dev/null +++ b/src/ai_migrate/s3_uploader.py @@ -0,0 +1,62 @@ +from datetime import datetime +from pathlib import Path +import boto3 +import asyncio +import shutil +import logging + +logger = logging.getLogger("ai_migrate.s3_uploader") + + +class S3Uploader: + def __init__(self, bucket_name: str): + self.s3_client = boto3.client("s3") if bucket_name != "localhost" else None + self.bucket = bucket_name + + async def upload( + self, project: str, paths: list[Path], result_type: str, timestamp: datetime + ) -> None: + dest = Path(f"{project}/attempt-{timestamp.strftime('%Y%m%d-%H%M%S')}") + if result_type: + dest = dest / result_type + + if self.bucket == "localhost": + dest = Path("~/ai-migration-results").expanduser() / dest + dest.mkdir(parents=True, exist_ok=True) + for path in paths: + try: + shutil.copy2(path, dest / path.name) + except OSError as e: + logger.error(f"Failed to save {path.name}: {e}") + return + + semaphore = asyncio.Semaphore(5) + + async def upload_one(path: Path): + key = str(dest / path.name) + try: + async with semaphore: + await asyncio.to_thread( + self.s3_client.put_object, + Bucket=self.bucket, + Key=key, + Body=path.read_bytes(), + ) + except Exception as e: + logger.error(f"Failed to upload {path.name} to {self.bucket}: {e}") + + await asyncio.gather(*[upload_one(path) for path in paths]) + + async def upload_results( + self, + project: str, + results: list, + results_file: Path, + timestamp: datetime = None, + ) -> None: + timestamp = timestamp or datetime.now() + await self.upload(project, [results_file], "manifest", timestamp) + for result in results: + await self.upload( + project, [Path(f) for f in result.files], result.result, timestamp + )