Set timeout for backup cycle to take care of backup results
diff --git a/requirements.txt b/requirements.txt
index 45ae2d5..9c3aad4 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,7 @@
oslo.db>=5.0.0
oslo.config>=8.1.0
oslo.log>=4.4.0 # Apache-2.0
+oslo_versionedobjects
openstacksdk>0.28.0
pymysql
diff --git a/staffeln/common/constants.py b/staffeln/common/constants.py
index d0ec6fe..91b2a95 100644
--- a/staffeln/common/constants.py
+++ b/staffeln/common/constants.py
@@ -3,3 +3,7 @@
BACKUP_PLANNED=0
BACKUP_ENABLED_KEY = 'true'
+BACKUP_RESULT_CHECK_INTERVAL = 60 # second
+
+# default config values
+DEFAULT_BACKUP_CYCLE_TIMEOUT="5min"
\ No newline at end of file
diff --git a/staffeln/common/time.py b/staffeln/common/time.py
index 3059085..a4f3c80 100644
--- a/staffeln/common/time.py
+++ b/staffeln/common/time.py
@@ -5,7 +5,7 @@
DEFAULT_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
regex = re.compile(
- r'((?P<years>\d+?)y)?((?P<months>\d+?)m)?((?P<weeks>\d+?)w)?((?P<days>\d+?)d)?'
+ r'((?P<years>\d+?)y)?((?P<months>\d+?)m)?((?P<weeks>\d+?)w)?((?P<days>\d+?)d)?((?P<hours>\d+?)h)?((?P<minutes>\d+?)min)?((?P<seconds>\d+?)s)?'
)
@@ -37,10 +37,12 @@
return now.strftime(DEFAULT_TIME_FORMAT)
-def timeago(years, months, weeks, days, from_date=None):
+def timeago(years=0, months=0, weeks=0, days=0, hours=0, minutes=0, seconds=0, from_date=None):
if from_date is None:
from_date = datetime.now()
- return from_date - relativedelta(years=years, months=months, weeks=weeks, days=days)
+ return from_date - relativedelta(years=years, months=months,
+ weeks=weeks, days=days, hours=hours,
+ minutes=minutes, seconds=seconds)
## yearsago using Standard library
# def yearsago(years, from_date=None):
diff --git a/staffeln/conductor/backup.py b/staffeln/conductor/backup.py
index f358c8a..3cd48fe 100755
--- a/staffeln/conductor/backup.py
+++ b/staffeln/conductor/backup.py
@@ -87,12 +87,34 @@
except OpenstackResourceNotFound:
return False
- def remove_volume_backup(self, backup_object):
+ # delete all backups forcily regardless of the status
+ def hard_cancel_volume_backup(self, task):
+ try:
+ backup = conn.get_volume_backup(task.backup_id)
+ if backup == None: return task.delete_queue()
+
+ conn.delete_volume_backup(task.backup_id, force=True)
+ task.delete_queue()
+
+ except OpenstackResourceNotFound:
+ task.delete_queue()
+
+ except OpenstackSDKException as e:
+ LOG.info(_("Backup %s deletion failed."
+ "%s" % (task.backup_id, str(e))))
+ # TODO(Alex): When backup timeout and cancel failed
+ # 1. notify
+ # 2. set the volume status as in-use
+ # remove from the queue table
+ task.delete_queue()
+
+ # delete only available backups
+ def soft_remove_volume_backup(self, backup_object):
try:
backup = conn.get_volume_backup(backup_object.backup_id)
- if backup == None: return False
+ if backup == None: return backup_object.delete_backup()
if backup["status"] in ("available"):
- conn.delete_volume_backup(backup_object.backup_id)
+ conn.delete_volume_backup(backup_object.backup_id, force=True)
backup_object.delete_backup()
elif backup["status"] in ("error", "error_restoring"):
# TODO(Alex): need to discuss
@@ -111,6 +133,37 @@
backup_object.delete_backup()
return False
+ except OpenstackSDKException as e:
+ LOG.info(_("Backup %s deletion failed."
+ "%s" % (backup_object.backup_id, str(e))))
+ # TODO(Alex): Add it into the notification queue
+ # remove from the backup table
+ backup_object.delete_backup()
+ return False
+
+
+ # delete all backups forcily regardless of the status
+ def hard_remove_volume_backup(self, backup_object):
+ try:
+ backup = conn.get_volume_backup(backup_object.backup_id)
+ if backup == None: return backup_object.delete_backup()
+
+ conn.delete_volume_backup(backup_object.backup_id, force=True)
+ backup_object.delete_backup()
+
+ except OpenstackResourceNotFound:
+ LOG.info(_("Backup %s is not existing in Openstack."
+ "Or cinder-backup is not existing in the cloud." % backup_object.backup_id))
+ # remove from the backup table
+ backup_object.delete_backup()
+
+ except OpenstackSDKException as e:
+ LOG.info(_("Backup %s deletion failed."
+ "%s" % (backup_object.backup_id, str(e))))
+ # TODO(Alex): Add it into the notification queue
+ # remove from the backup table
+ backup_object.delete_backup()
+
def check_instance_volumes(self):
"""Get the list of all the volumes from the project using openstacksdk
Function first list all the servers in the project and get the volumes
@@ -163,12 +216,13 @@
volume_backup = conn.create_volume_backup(
volume_id=queue.volume_id, force=True
)
- queue.backup_id = volume_backup.id
- queue.backup_status = constants.BACKUP_WIP
- queue.save()
except OpenstackSDKException as error:
LOG.info(_("Backup creation for the volume %s failled. %s"
% (queue.volume_id, str(error))))
+
+ queue.backup_id = volume_backup.id
+ queue.backup_status = constants.BACKUP_WIP
+ queue.save()
else:
pass
# TODO(Alex): remove this task from the task list
@@ -214,7 +268,7 @@
backup_gen = conn.get_volume_backup(queue.backup_id)
if backup_gen == None:
# TODO(Alex): need to check when it is none
- LOG.info(_("Backup status of %s is returning none."%(queue.backup_id)))
+ LOG.info(_("Backup status of %s is returning none." % (queue.backup_id)))
return
if backup_gen.status == "error":
self.process_failed_backup(queue)
diff --git a/staffeln/conductor/manager.py b/staffeln/conductor/manager.py
index 6898348..aaa8773 100755
--- a/staffeln/conductor/manager.py
+++ b/staffeln/conductor/manager.py
@@ -1,5 +1,4 @@
import cotyledon
-import datetime
from futurist import periodics
from oslo_log import log
import staffeln.conf
@@ -64,11 +63,42 @@
# That is required to make the backup report
def _process_wip_tasks(self):
LOG.info(_("Processing WIP backup generators..."))
- queues_started = backup.Backup().get_queues(
- filters={"backup_status": constants.BACKUP_WIP}
+ # TODO(Alex): Replace this infinite loop with finite time
+ self.cycle_start_time = xtime.get_current_time()
+
+ # loop - take care of backup result
+ while(1):
+ queues_started = backup.Backup().get_queues(
+ filters={"backup_status": constants.BACKUP_WIP}
+ )
+ if len(queues_started) == 0: break
+ if not self._backup_cycle_timeout():# time in
+ for queue in queues_started: backup.Backup().check_volume_backup_status(queue)
+ else: # time out
+ for queue in queues_started: backup.Backup().hard_cancel_volume_backup(queue)
+ time.sleep(constants.BACKUP_RESULT_CHECK_INTERVAL)
+
+ # if the backup cycle timeout, then return True
+ def _backup_cycle_timeout(self):
+ time_delta_dict = xtime.parse_timedelta_string(CONF.conductor.backup_cycle_timout)
+
+ if time_delta_dict == None:
+ LOG.info(_("Recycle timeout format is invalid. "
+ "Follow <YEARS>y<MONTHS>m<WEEKS>w<DAYS>d<HOURS>h<MINUTES>min<SECONDS>s."))
+ time_delta_dict = xtime.parse_timedelta_string(constants.DEFAULT_BACKUP_CYCLE_TIMEOUT)
+
+ rto = xtime.timeago(
+ years=time_delta_dict["years"],
+ months=time_delta_dict["months"],
+ weeks=time_delta_dict["weeks"],
+ days=time_delta_dict["days"],
+ hours=time_delta_dict["hours"],
+ minutes=time_delta_dict["minutes"],
+ seconds=time_delta_dict["seconds"],
)
- if len(queues_started) != 0:
- for queue in queues_started: backup.Backup().check_volume_backup_status(queue)
+ if rto >= self.cycle_start_time:
+ return True
+ return False
# Create backup generators
def _process_todo_tasks(self):
@@ -92,7 +122,6 @@
self.failed_backup_list = []
notify.SendBackupResultEmail(self.success_backup_list, self.failed_backup_list)
-
@periodics.periodic(spacing=CONF.conductor.backup_service_period, run_immediately=True)
def backup_engine(self):
LOG.info("backing... %s" % str(time.time()))
@@ -101,14 +130,12 @@
if self._check_quota(): return
# NOTE(Alex): If _process_wip_tasks() waits tiil no WIP tasks
# exist, no need to repeat this function before and after queue update.
- self._process_wip_tasks()
self._update_task_queue()
self._process_todo_tasks()
self._process_wip_tasks()
self._report_backup_result()
-
class RotationManager(cotyledon.Service):
name = "Staffeln conductor rotation controller"
@@ -145,8 +172,7 @@
def remove_backups(self):
print(self.backup_list)
for retention_backup in self.backup_list:
- # 1. check the backup status and delete only available backups
- backup.Backup().remove_volume_backup(retention_backup)
+ backup.Backup().hard_remove_volume_backup(retention_backup)
@periodics.periodic(spacing=CONF.conductor.retention_service_period, run_immediately=True)
def rotation_engine(self):
@@ -160,15 +186,18 @@
# get the threshold time str
def get_threshold_strtime(self):
time_delta_dict = xtime.parse_timedelta_string(CONF.conductor.retention_time)
- if time_delta_dict == None: return None
+ if time_delta_dict == None:
+ LOG.info(_("Retention time format is invalid. "
+ "Follow <YEARS>y<MONTHS>m<WEEKS>w<DAYS>d<HOURS>h<MINUTES>min<SECONDS>s."))
+ return None
res = xtime.timeago(
years=time_delta_dict["years"],
months=time_delta_dict["months"],
weeks=time_delta_dict["weeks"],
days=time_delta_dict["days"],
+ hours=time_delta_dict["hours"],
+ minutes=time_delta_dict["minutes"],
+ seconds=time_delta_dict["seconds"],
)
- if res == None: LOG.info(_("Retention time format is invalid. "
- "Follow <YEARS>y<MONTHS>m<WEEKS>w<DAYS>d."))
-
return res.strftime(xtime.DEFAULT_TIME_FORMAT)
diff --git a/staffeln/conductor/notify.py b/staffeln/conductor/notify.py
index d80da4e..2f77aa9 100644
--- a/staffeln/conductor/notify.py
+++ b/staffeln/conductor/notify.py
@@ -54,4 +54,3 @@
LOG.info(_("Backup result email sent"))
except Exception as e:
LOG.error(_("Backup result email send failed. Please check email configuration. %s" % (str(e))))
-
diff --git a/staffeln/conf/conductor.py b/staffeln/conf/conductor.py
index 4e2aad9..8912748 100755
--- a/staffeln/conf/conductor.py
+++ b/staffeln/conf/conductor.py
@@ -1,4 +1,5 @@
from oslo_config import cfg
+from staffeln.common import constants
from staffeln.i18n import _
conductor_group = cfg.OptGroup(
@@ -21,6 +22,13 @@
help=_("The time of bakup period, the unit is one minute."),
),
cfg.StrOpt(
+ "backup_cycle_timout",
+ regex=r'((?P<years>\d+?)y)?((?P<months>\d+?)m)?((?P<weeks>\d+?)w)?((?P<days>\d+?)d)?((?P<hours>\d+?)h)?((?P<minutes>\d+?)min)?((?P<seconds>\d+?)s)?',
+ default=constants.DEFAULT_BACKUP_CYCLE_TIMEOUT,
+ help=_("The duration while the backup cycle waits backups."
+ "<YEARS>y<MONTHS>m<WEEKS>w<DAYS>d<HOURS>h<MINUTES>min<SECONDS>s."),
+ ),
+ cfg.StrOpt(
"backup_metadata_key",
default="__automated_backup",
help=_("The key string of metadata the VM, which requres back up, has"),
@@ -53,9 +61,10 @@
),
cfg.StrOpt(
"retention_time",
+ regex=r'((?P<years>\d+?)y)?((?P<months>\d+?)m)?((?P<weeks>\d+?)w)?((?P<days>\d+?)d)?((?P<hours>\d+?)h)?((?P<minutes>\d+?)min)?((?P<seconds>\d+?)s)?',
default="2w3d",
help=_("The time of retention period, the for mat is "
- "<YEARS>y<MONTHS>m<WEEKS>w<DAYS>d."),
+ "<YEARS>y<MONTHS>m<WEEKS>w<DAYS>d<HOURS>h<MINUTES>min<SECONDS>s."),
),
]