From d1066632c8250876a54a8ab55d35cdaacce8e3f9 Mon Sep 17 00:00:00 2001 From: Nicola Stoira <nicola.stoira@accenture.com> Date: Tue, 10 Dec 2024 10:37:45 +0100 Subject: [PATCH 1/2] Update delete project logic to delete objects in chunks --- lib/database.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/database.py b/lib/database.py index 9bca796f..52b435c0 100644 --- a/lib/database.py +++ b/lib/database.py @@ -143,10 +143,12 @@ class Database(object): if bucket_exists(bucket = bucket, config=self.config): page_objects_to_delete = self.list_objects_keys(bucket=bucket, prefix=f"{project_name}/") if page_objects_to_delete: - delete_keys = {'Objects' : []} - delete_keys['Objects'] = [{'Key' : k} for k in page_objects_to_delete] - if delete_keys['Objects']: - self.config.s3_client.delete_objects(Bucket=self.config.s3_connector_bucket, Delete=delete_keys) + max_objects_per_batch = 1000 + for i in range(0, len(page_objects_to_delete), max_objects_per_batch): + batch = page_objects_to_delete[i:i + max_objects_per_batch] + delete_keys = {'Objects': [{'Key': k} for k in batch]} + if delete_keys['Objects']: + self.config.s3_client.delete_objects(Bucket=self.config.s3_connector_bucket, Delete=delete_keys) self.create_connection() cleanup_sqls = ["DELETE FROM landing_zone WHERE project_name=%s AND data_provider_id=%s;", -- GitLab From b57a686d5fcffc4ee0e7a1440389510ffeb442c8 Mon Sep 17 00:00:00 2001 From: Nicola Stoira <nicola.stoira@accenture.com> Date: Tue, 10 Dec 2024 11:43:02 +0100 Subject: [PATCH 2/2] Reuse existing logic --- lib/database.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/database.py b/lib/database.py index 52b435c0..a92a60d4 100644 --- a/lib/database.py +++ b/lib/database.py @@ -143,10 +143,8 @@ class Database(object): if bucket_exists(bucket = bucket, config=self.config): page_objects_to_delete = self.list_objects_keys(bucket=bucket, prefix=f"{project_name}/") if page_objects_to_delete: - max_objects_per_batch = 1000 - for i in range(0, len(page_objects_to_delete), max_objects_per_batch): - batch = page_objects_to_delete[i:i + max_objects_per_batch] - delete_keys = {'Objects': [{'Key': k} for k in batch]} + for chunk in Database._split_list_into_chunks(object_names=page_objects_to_delete): + delete_keys = {'Objects': [{'Key': k} for k in chunk]} if delete_keys['Objects']: self.config.s3_client.delete_objects(Bucket=self.config.s3_connector_bucket, Delete=delete_keys) -- GitLab