#!/usr/bin/env python3 """ Cloud Backup Sync Daemon Watches /opt/immich directories for changes using pyinotify and triggers rclone sync after a configurable cooldown period. Writes status to /tmp/cloud_backup_status.json for the API to read. Logs to /opt/media-downloader/logs/cloud_backup.log. Signals: SIGHUP - Force config reload from database SIGTERM - Graceful shutdown """ import json import logging import os import re import signal import subprocess import sys import threading import time from datetime import datetime from pathlib import Path # Bootstrap database backend sys.path.insert(0, '/opt/media-downloader') import modules.db_bootstrap # noqa: E402,F401 import pyinotify from modules.settings_manager import SettingsManager from modules.universal_logger import get_logger # ============================================================================ # CONFIGURATION # ============================================================================ SETTINGS_KEY = "cloud_backup" DB_PATH = os.environ.get( "DATABASE_URL", "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader" ) # SettingsManager uses the patched sqlite3 module via db_bootstrap, so we just # pass any path — pgadapter routes it to PostgreSQL. SM_DB_PATH = "/opt/media-downloader/database/media_downloader.db" IMMICH_BASE = Path("/opt/immich") RCLONE_CONF = Path("/root/.config/rclone/rclone.conf") RCLONE_CRYPT_NAME = "cloud-backup-crypt" RCLONE_REMOTE_NAME = "cloud-backup-remote" STATUS_FILE = Path("/tmp/cloud_backup_status.json") TRIGGER_FILE = Path("/tmp/cloud_backup_trigger.json") LOG_FILE = Path("/opt/media-downloader/logs/cloud_backup.log") CONFIG_RELOAD_INTERVAL = 60 # seconds MIN_INOTIFY_WATCHES = 524288 DEFAULT_CONFIG = { "enabled": False, "provider": "b2", "endpoint": "", "bucket": "", "key_id": "", "application_key": "", "encryption_enabled": True, "encryption_password": "", "encryption_salt": "", "include_dirs": ["paid", "private", "md", "el", "elv", "ela", "upload", "review", "recycle", "db_dumps", "app_backup"], "exclude_dirs": ["lost+found", "db.old"], "cooldown_seconds": 300, "bandwidth_limit": None, } # ============================================================================ # LOGGING # ============================================================================ logger = get_logger('CloudBackup') # Add a sync-log-specific handler to the CloudBackup logger only (not root). # This writes to cloud_backup.log for the config page's log viewer. # INFO level to exclude DEBUG inotify spam. _sync_log_handler = logging.FileHandler(str(LOG_FILE)) _sync_log_handler.setLevel(logging.INFO) _sync_log_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')) logging.getLogger('MediaDownloader.CloudBackup').addHandler(_sync_log_handler) logging.getLogger('MediaDownloader.CloudBackup').propagate = False # ============================================================================ # GLOBAL STATE # ============================================================================ # dirty_dirs: maps dir_name -> timestamp of last change (per-directory cooldown) dirty_dirs = {} # type: dict[str, float] dirty_dirs_lock = threading.Lock() config_lock = threading.Lock() current_config = dict(DEFAULT_CONFIG) shutdown_event = threading.Event() force_reload = threading.Event() syncing_lock = threading.Lock() currently_syncing = False error_count = 0 last_sync_time = None last_sync_status = None last_error = None files_watched = 0 # Progress tracking for manual sync jobs (reported via status file) active_job_id = None sync_current_dir = None sync_dirs_synced = 0 sync_dirs_total = 0 sync_phase = None sync_transfer_stats = None sync_current_file = None sync_started_at = None sync_completed_at = None sync_error = None # ============================================================================ # STATUS FILE # ============================================================================ def write_status(): """Write current state to status JSON for API consumption.""" global files_watched with dirty_dirs_lock: pending_dirs = list(dirty_dirs.keys()) has_pending = len(pending_dirs) > 0 oldest_change = min(dirty_dirs.values()) if dirty_dirs else 0.0 state = "stopped" if shutdown_event.is_set(): state = "stopped" elif currently_syncing: state = "syncing" elif has_pending: state = "cooldown" else: state = "idle" cooldown_remaining = 0 if has_pending and not currently_syncing: cooldown = current_config.get('cooldown_seconds', 300) elapsed = time.time() - oldest_change cooldown_remaining = max(0, int(cooldown - elapsed)) status = { "state": state, "last_sync": last_sync_time, "last_sync_status": last_sync_status, "error_count": error_count, "last_error": last_error, "files_watched": files_watched, "cooldown_remaining": cooldown_remaining, "sync_pending": has_pending, "pending_dirs": pending_dirs, "updated_at": datetime.now().isoformat(), } # Job progress fields — include whenever syncing (manual or automatic) if active_job_id or currently_syncing: status["job_id"] = active_job_id or "auto" status["current_dir"] = sync_current_dir status["dirs_synced"] = sync_dirs_synced status["dirs_total"] = sync_dirs_total status["phase"] = sync_phase status["transfer_stats"] = sync_transfer_stats status["current_file"] = sync_current_file status["started_at"] = sync_started_at status["completed_at"] = sync_completed_at status["sync_error"] = sync_error # Get storage used from rclone about (cached, updated during sync) storage_file = Path("/tmp/cloud_backup_storage.json") if storage_file.exists(): try: storage = json.loads(storage_file.read_text()) status["storage_used"] = storage.get("used") except (json.JSONDecodeError, OSError): pass try: STATUS_FILE.write_text(json.dumps(status)) except OSError as e: logger.warning(f"Failed to write status file: {e}") # ============================================================================ # CONFIG LOADING # ============================================================================ def load_config(): """Load configuration from SettingsManager (PostgreSQL via pgadapter).""" global current_config try: sm = SettingsManager(SM_DB_PATH) stored = sm.get(SETTINGS_KEY) if stored and isinstance(stored, dict): with config_lock: current_config = {**DEFAULT_CONFIG, **stored} logger.debug("Configuration loaded from database") else: logger.debug("No cloud backup config in database, using defaults") except Exception as e: logger.error(f"Failed to load config: {e}") def config_reload_loop(): """Periodically reload config from database.""" while not shutdown_event.is_set(): if force_reload.wait(timeout=CONFIG_RELOAD_INTERVAL): force_reload.clear() if shutdown_event.is_set(): break load_config() # ============================================================================ # INOTIFY HANDLER # ============================================================================ class ChangeHandler(pyinotify.ProcessEvent): """Handle filesystem change events, tracking which include_dir changed.""" def process_default(self, event): # Skip temporary files if event.name and (event.name.startswith('.') or event.name.endswith('.tmp')): return # Map the changed path to its top-level include_dir try: rel = Path(event.pathname).relative_to(IMMICH_BASE) top_dir = rel.parts[0] if rel.parts else None except (ValueError, IndexError): top_dir = None if not top_dir: return with config_lock: include_dirs = current_config.get('include_dirs', []) if top_dir not in include_dirs: return with dirty_dirs_lock: dirty_dirs[top_dir] = time.time() logger.debug(f"Change detected in {top_dir}/: {event.pathname}") # ============================================================================ # PRE-SYNC DUMPS (databases + app archive + system configs) # ============================================================================ def _find_ssd_mount(): """Auto-detect the SSD underlying a mergerfs mount at IMMICH_BASE.""" try: mounts = {} with open('/proc/mounts') as f: for line in f: parts = line.split() if len(parts) >= 2 and parts[0].startswith('/dev/'): mounts[parts[1]] = parts[0] with open('/proc/mounts') as f: for line in f: parts = line.split() if len(parts) >= 3 and parts[1] == str(IMMICH_BASE) and 'mergerfs' in parts[2]: drive_names = parts[0].split(':') for name in drive_names: name = name.strip() if not name: continue candidates = [name, f'/mnt/{name}'] for drive_path in candidates: if drive_path in mounts: dev = mounts[drive_path] dev_base = dev.rstrip('0123456789') dev_name = dev_base.split('/')[-1] rotational = Path(f'/sys/block/{dev_name}/queue/rotational') if rotational.exists() and rotational.read_text().strip() == '0': return Path(drive_path) except Exception: pass return None _FAST_BASE = _find_ssd_mount() or IMMICH_BASE DB_DUMPS_DIR = _FAST_BASE / "db_dumps" APP_BACKUP_DIR = _FAST_BASE / "app_backup" RCLONE_CONF_PATH = Path("/root/.config/rclone/rclone.conf") SYSTEMD_FILES = [ "media-downloader.service", "media-downloader-api.service", "media-downloader-frontend.service", "media-downloader-db-cleanup.service", "media-downloader-db-cleanup.timer", "xvfb-media-downloader.service", "cloud-backup-sync.service", ] def _run_pre_sync_dumps(include_immich_db=True, include_app_archive=True): """Dump databases and archive app + system configs before syncing. Args: include_immich_db: If False, skip Immich DB dump (only needed daily). include_app_archive: If False, skip app tar.gz + system configs (only needed daily). """ DB_DUMPS_DIR.mkdir(parents=True, exist_ok=True) APP_BACKUP_DIR.mkdir(parents=True, exist_ok=True) errors = [] # ── 1. Database dumps ────────────────────────────────────────────── # Immich PostgreSQL (Docker container) — only during daily full backup if include_immich_db: try: logger.info("Dumping Immich database...") subprocess.run( ["docker", "exec", "immich_postgres", "rm", "-rf", "/tmp/immich_dump"], capture_output=True, text=True, timeout=10 ) result = subprocess.run( ["docker", "exec", "immich_postgres", "pg_dump", "-U", "postgres", "-d", "immich", "--no-owner", "--no-acl", "-Fd", "-j", "4", "--exclude-table-data=face_search", "--exclude-table-data=smart_search", "-f", "/tmp/immich_dump"], capture_output=True, text=True, timeout=600 ) if result.returncode == 0: subprocess.run( ["docker", "exec", "immich_postgres", "tar", "cf", "/tmp/immich_dump.tar", "-C", "/tmp", "immich_dump"], capture_output=True, text=True, timeout=120 ) # docker cp directly to SSD-backed DB_DUMPS_DIR final_dest = str(DB_DUMPS_DIR / "immich_dump.tar") subprocess.run( ["docker", "cp", "immich_postgres:/tmp/immich_dump.tar", final_dest], capture_output=True, text=True, timeout=120 ) subprocess.run( ["docker", "exec", "immich_postgres", "sh", "-c", "rm -rf /tmp/immich_dump /tmp/immich_dump.tar"], capture_output=True, text=True, timeout=10 ) size_mb = Path(final_dest).stat().st_size / 1e6 logger.info(f"Immich DB dump: {size_mb:.1f} MB") else: errors.append(f"Immich pg_dump failed: {result.stderr[:200]}") logger.error(f"Immich pg_dump failed: {result.stderr[:200]}") except Exception as e: errors.append(f"Immich pg_dump error: {e}") logger.error(f"Immich pg_dump error: {e}") # Media Downloader PostgreSQL (local) — parallel directory-format dump # Uses --lock-wait-timeout to avoid hanging when scheduler holds locks, # with a serial (-j 1) retry if parallel dump fails on lock contention. try: logger.info("Dumping Media Downloader database...") import shutil # Dump directly to SSD-backed DB_DUMPS_DIR final_dump_dir = str(DB_DUMPS_DIR / "media_downloader_dump") if Path(final_dump_dir).exists(): shutil.rmtree(final_dump_dir) env = os.environ.copy() env["PGPASSWORD"] = "PNsihOXvvuPwWiIvGlsc9Fh2YmMmB" base_cmd = [ "pg_dump", "-h", "localhost", "-U", "media_downloader", "-d", "media_downloader", "--no-owner", "--no-acl", "--exclude-table-data=thumbnails", "--lock-wait-timeout=30000", ] # Try parallel dump first result = subprocess.run( base_cmd + ["-Fd", "-j", "4", "-f", final_dump_dir], capture_output=True, text=True, timeout=600, env=env ) if result.returncode != 0 and "could not obtain lock" in result.stderr: # Lock contention — retry serial after a short wait logger.warning("Parallel pg_dump hit lock contention, retrying serial in 15s...") if Path(final_dump_dir).exists(): shutil.rmtree(final_dump_dir) time.sleep(15) result = subprocess.run( base_cmd + ["-Fd", "-j", "1", "-f", final_dump_dir], capture_output=True, text=True, timeout=600, env=env ) if result.returncode == 0: total = sum(f.stat().st_size for f in Path(final_dump_dir).rglob('*') if f.is_file()) logger.info(f"Media Downloader DB dump: {total / 1e6:.1f} MB") else: errors.append(f"Media Downloader pg_dump failed: {result.stderr[:200]}") logger.error(f"Media Downloader pg_dump failed: {result.stderr[:200]}") except Exception as e: errors.append(f"Media Downloader pg_dump error: {e}") logger.error(f"Media Downloader pg_dump error: {e}") # ── 2-6: App archive + system configs (daily only) ───────────────── if include_app_archive: try: logger.info("Archiving media-downloader app...") import shutil # Write directly to SSD-backed APP_BACKUP_DIR final_archive = APP_BACKUP_DIR / "media-downloader-app.tar.gz" result = subprocess.run( ["tar", "czf", str(final_archive), "--exclude=./venv", "--exclude=./web/frontend/node_modules", "--exclude=./logs", "--exclude=./cache/thumbnails", "--exclude=./__pycache__", "--exclude=./.git", "--exclude=./temp", "-C", "/opt", "media-downloader"], capture_output=True, text=True, timeout=600 ) if result.returncode == 0: size_mb = final_archive.stat().st_size / 1e6 logger.info(f"App archive: {size_mb:.1f} MB") else: errors.append(f"App archive failed: {result.stderr[:200]}") logger.error(f"App archive failed: {result.stderr[:200]}") except Exception as e: errors.append(f"App archive error: {e}") logger.error(f"App archive error: {e}") try: services_dir = APP_BACKUP_DIR / "systemd" services_dir.mkdir(parents=True, exist_ok=True) copied = 0 for svc in SYSTEMD_FILES: src = Path(f"/etc/systemd/system/{svc}") if src.exists(): (services_dir / svc).write_text(src.read_text()) copied += 1 logger.info(f"Copied {copied} systemd service files") except Exception as e: errors.append(f"Systemd backup error: {e}") logger.error(f"Systemd backup error: {e}") try: if RCLONE_CONF_PATH.exists(): (APP_BACKUP_DIR / "rclone.conf").write_text(RCLONE_CONF_PATH.read_text()) logger.info("Copied rclone.conf") except Exception as e: errors.append(f"rclone config backup error: {e}") logger.error(f"rclone config backup error: {e}") try: compose_src = IMMICH_BASE / "docker-compose.yml" if compose_src.exists(): (APP_BACKUP_DIR / "immich-docker-compose.yml").write_text(compose_src.read_text()) env_src = IMMICH_BASE / ".env" if env_src.exists(): (APP_BACKUP_DIR / "immich-env").write_text(env_src.read_text()) logger.info("Copied Immich docker-compose + .env") except Exception as e: errors.append(f"Immich compose backup error: {e}") logger.error(f"Immich compose backup error: {e}") try: restore_src = Path("/opt/media-downloader/scripts/cloud_backup_restore.sh") if restore_src.exists(): (APP_BACKUP_DIR / "RESTORE.sh").write_text(restore_src.read_text()) logger.info("Copied restore script as RESTORE.sh") except Exception as e: errors.append(f"Restore script backup error: {e}") logger.error(f"Restore script backup error: {e}") return errors # ============================================================================ # RCLONE STATS PARSER # ============================================================================ def _parse_rclone_stats(line): """Parse an rclone --stats-one-line output line into structured data. Actual rclone format: INFO : 1.424 GiB / 1.424 GiB, 100%, 0 B/s, ETA - INFO : 1.424 GiB / 1.424 GiB, 100%, 10.5 MiB/s, ETA 7m30s, Checks: 100, Transferred: 15 / 45 """ stats = {} try: m = re.search(r'([\d.]+\s*\S*?B)\s*/\s*([\d.]+\s*\S*?B),\s*(\d+)%,\s*([\d.]+\s*\S*?/s)', line) if m: stats['bytes_done'] = m.group(1).strip() stats['bytes_total'] = m.group(2).strip() stats['pct'] = int(m.group(3)) stats['speed'] = m.group(4).strip() m_eta = re.search(r'ETA\s+([\w.]+)', line) if m_eta and m_eta.group(1) != '-': stats['eta'] = m_eta.group(1) m_files = re.search(r'Transferred:\s*(\d+)\s*/\s*(\d+)', line) if m_files: stats['files_done'] = int(m_files.group(1)) stats['files_total'] = int(m_files.group(2)) m_checks = re.search(r'Checks:\s*(\d+)', line) if m_checks: stats['checks_done'] = int(m_checks.group(1)) m_chk = re.search(r'chk#(\d+)/(\d+)', line) if m_chk: stats['checks_done'] = int(m_chk.group(1)) stats['checks_total'] = int(m_chk.group(2)) except (ValueError, AttributeError): pass return stats if stats else None # ============================================================================ # SYNC # ============================================================================ def run_file_sync(target_dirs=None): """Execute rclone sync for specified directories (or all if target_dirs is None). Args: target_dirs: Optional list of directory names to sync. If None, syncs all include_dirs (used by manual triggers and full backups). Uses Popen with --stats-one-line for live progress tracking. """ global currently_syncing, error_count, last_sync_time, last_sync_status, last_error global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file global sync_started_at, sync_completed_at, sync_error with config_lock: config = dict(current_config) if not config.get('enabled'): logger.info("Sync skipped: cloud backup disabled") # Clear any pending dirs since we can't sync anyway with dirty_dirs_lock: dirty_dirs.clear() return if not config.get('key_id') or not config.get('bucket'): logger.warning("Sync skipped: missing credentials or bucket") with dirty_dirs_lock: dirty_dirs.clear() return with syncing_lock: currently_syncing = True try: _run_file_sync_inner(config, target_dirs) finally: with syncing_lock: currently_syncing = False def _run_file_sync_inner(config, target_dirs): """Inner implementation of run_file_sync (wrapped in try/finally by caller).""" global error_count, last_sync_time, last_sync_status, last_error global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file global sync_started_at, sync_completed_at, sync_error if target_dirs is not None: # Targeted sync: only sync specified dirs, remove them from dirty_dirs with dirty_dirs_lock: for d in target_dirs: dirty_dirs.pop(d, None) dirs_to_sync = [d for d in target_dirs if (IMMICH_BASE / d).is_dir()] else: # Full sync: sync all include_dirs, clear entire dirty_dirs include_dirs = config.get('include_dirs', []) with dirty_dirs_lock: dirty_dirs.clear() dirs_to_sync = [d for d in include_dirs if (IMMICH_BASE / d).is_dir()] encryption_enabled = config.get('encryption_enabled', True) bandwidth_limit = config.get('bandwidth_limit') bucket = config.get('bucket', '') sync_errors = [] sync_dirs_total = len(dirs_to_sync) sync_dirs_synced = 0 sync_phase = "syncing" if not active_job_id: # Auto sync — set started_at for progress tracking sync_started_at = datetime.now().isoformat() sync_completed_at = None sync_error = None write_status() logger.info(f"Starting file sync of {len(dirs_to_sync)} directories") for i, dir_name in enumerate(dirs_to_sync): if shutdown_event.is_set(): break sync_current_dir = dir_name sync_dirs_synced = i sync_transfer_stats = None sync_current_file = None sync_phase = "checking" write_status() src = str(IMMICH_BASE / dir_name) if encryption_enabled: dest = f"{RCLONE_CRYPT_NAME}:{dir_name}" else: dest = f"{RCLONE_REMOTE_NAME}:{bucket}/{dir_name}" cmd = [ "rclone", "sync", src, dest, "--config", str(RCLONE_CONF), "--stats", "3s", "--stats-one-line", "-v", "--transfers", "4", "--checkers", "16", "--fast-list", ] if bandwidth_limit: cmd.extend(["--bwlimit", bandwidth_limit]) logger.info(f"Syncing {dir_name}...") try: proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) log_fh = open(str(LOG_FILE), 'a') ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') log_fh.write(f"\n[{ts}] Syncing {dir_name}/ ({i+1}/{len(dirs_to_sync)})\n") log_fh.flush() try: for line in proc.stderr: stripped = line.strip() if not stripped: continue # Detect progress/stats lines is_progress = ('%,' in stripped and '/s' in stripped) or 'chk#' in stripped # Write to log clean = re.sub(r'^<\d+>', '', stripped) clean = re.sub(r'^(INFO|DEBUG|ERROR|NOTICE)\s*:\s*', '', clean).strip() if is_progress: log_fh.write(f" [progress] {clean}\n") elif clean: log_fh.write(f" {clean}\n") log_fh.flush() # Update progress globals if is_progress: stats = _parse_rclone_stats(stripped) if stats: # Determine phase from what rclone is doing right now: # - If checks are progressing but no transfers yet/anymore → checking # - If files are being transferred (pct > 0 or files_total > 0) → transferring has_active_transfer = ( stats.get('files_total', 0) > 0 and stats.get('files_done', 0) < stats.get('files_total', 0) ) or (0 < stats.get('pct', 0) < 100) if has_active_transfer: phase = 'transferring' else: phase = 'checking' # Clear stale current_file when back to checking sync_current_file = None sync_transfer_stats = stats sync_phase = phase write_status() if ': Copied' in stripped or ': Moved' in stripped: m = re.search(r'INFO\s*:\s*(.+?):\s*(?:Copied|Moved)', stripped) if m: fname = m.group(1).strip() sync_current_file = fname.split('/')[-1] if '/' in fname else fname sync_phase = 'transferring' write_status() finally: log_fh.close() proc.wait() if proc.returncode != 0: err = f"rclone sync failed for {dir_name}: exit code {proc.returncode}" sync_errors.append(err) logger.error(err) else: logger.info(f"Synced {dir_name} successfully") except Exception as e: sync_errors.append(f"Sync error for {dir_name}: {e}") logger.error(f"Sync error for {dir_name}: {e}") # Update storage info _update_storage_info(config) last_sync_time = datetime.now().isoformat() if sync_errors: last_sync_status = "error" last_error = sync_errors[-1] error_count += len(sync_errors) else: last_sync_status = "success" last_error = None sync_dirs_synced = len(dirs_to_sync) sync_current_dir = None sync_current_file = None sync_transfer_stats = None sync_completed_at = datetime.now().isoformat() if sync_errors: sync_error = sync_errors[-1] logger.info(f"File sync completed: {'with errors' if sync_errors else 'success'}") write_status() def run_full_backup(): """Run DB dumps + app archive + rclone sync (daily scheduled backup). Uses the same Popen-based progress tracking as run_file_sync(). For daily scheduled backups, includes Immich DB + app archive. For manual (API-triggered) backups, only dumps Media Downloader DB. """ global currently_syncing, error_count, last_sync_time, last_sync_status, last_error global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file with config_lock: config = dict(current_config) if not config.get('enabled'): logger.info("Full backup skipped: cloud backup disabled") return if not config.get('key_id') or not config.get('bucket'): logger.warning("Full backup skipped: missing credentials or bucket") return with syncing_lock: currently_syncing = True try: _run_full_backup_inner(config) finally: with syncing_lock: currently_syncing = False def _run_full_backup_inner(config): """Inner implementation of run_full_backup (wrapped in try/finally by caller).""" global error_count, last_sync_time, last_sync_status, last_error global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file with dirty_dirs_lock: dirty_dirs.clear() sync_phase = "preparing" write_status() # Run DB dumps + app archive logger.info("Running daily pre-sync dumps (DB + app archive)...") dump_errors = _run_pre_sync_dumps() # Then sync all directories (including db_dumps and app_backup) include_dirs = config.get('include_dirs', []) encryption_enabled = config.get('encryption_enabled', True) bandwidth_limit = config.get('bandwidth_limit') bucket = config.get('bucket', '') dirs_to_sync = [d for d in include_dirs if (IMMICH_BASE / d).is_dir()] sync_errors = list(dump_errors) sync_dirs_total = len(dirs_to_sync) sync_dirs_synced = 0 sync_phase = "syncing" write_status() logger.info(f"Starting full backup sync of {len(dirs_to_sync)} directories") for i, dir_name in enumerate(dirs_to_sync): if shutdown_event.is_set(): break sync_current_dir = dir_name sync_dirs_synced = i sync_transfer_stats = None sync_current_file = None sync_phase = "checking" write_status() src = str(IMMICH_BASE / dir_name) if encryption_enabled: dest = f"{RCLONE_CRYPT_NAME}:{dir_name}" else: dest = f"{RCLONE_REMOTE_NAME}:{bucket}/{dir_name}" cmd = [ "rclone", "sync", src, dest, "--config", str(RCLONE_CONF), "--stats", "3s", "--stats-one-line", "-v", "--transfers", "4", "--checkers", "16", "--fast-list", ] if bandwidth_limit: cmd.extend(["--bwlimit", bandwidth_limit]) logger.info(f"Syncing {dir_name}...") try: proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) log_fh = open(str(LOG_FILE), 'a') ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') log_fh.write(f"\n[{ts}] Syncing {dir_name}/ ({i+1}/{len(dirs_to_sync)})\n") log_fh.flush() try: for line in proc.stderr: stripped = line.strip() if not stripped: continue is_progress = ('%,' in stripped and '/s' in stripped) or 'chk#' in stripped clean = re.sub(r'^<\d+>', '', stripped) clean = re.sub(r'^(INFO|DEBUG|ERROR|NOTICE)\s*:\s*', '', clean).strip() if is_progress: log_fh.write(f" [progress] {clean}\n") elif clean: log_fh.write(f" {clean}\n") log_fh.flush() if is_progress: stats = _parse_rclone_stats(stripped) if stats: has_active_transfer = ( stats.get('files_total', 0) > 0 and stats.get('files_done', 0) < stats.get('files_total', 0) ) or (0 < stats.get('pct', 0) < 100) if has_active_transfer: phase = 'transferring' else: phase = 'checking' sync_current_file = None sync_transfer_stats = stats sync_phase = phase write_status() if ': Copied' in stripped or ': Moved' in stripped: m = re.search(r'INFO\s*:\s*(.+?):\s*(?:Copied|Moved)', stripped) if m: fname = m.group(1).strip() sync_current_file = fname.split('/')[-1] if '/' in fname else fname sync_phase = 'transferring' write_status() finally: log_fh.close() proc.wait() if proc.returncode != 0: err = f"rclone sync failed for {dir_name}: exit code {proc.returncode}" sync_errors.append(err) logger.error(err) else: logger.info(f"Synced {dir_name} successfully") except Exception as e: sync_errors.append(f"Sync error for {dir_name}: {e}") logger.error(f"Sync error for {dir_name}: {e}") # Update storage info _update_storage_info(config) last_sync_time = datetime.now().isoformat() if sync_errors: last_sync_status = "error" last_error = sync_errors[-1] error_count += len(sync_errors) else: last_sync_status = "success" last_error = None sync_dirs_synced = len(dirs_to_sync) sync_current_dir = None sync_current_file = None sync_transfer_stats = None logger.info(f"Full backup completed: {'with errors' if sync_errors else 'success'}") write_status() def _update_storage_info(config: dict): """Update cached storage usage from rclone about.""" bucket = config.get('bucket', '') encryption_enabled = config.get('encryption_enabled', True) if encryption_enabled: remote = f"{RCLONE_CRYPT_NAME}:" else: remote = f"{RCLONE_REMOTE_NAME}:{bucket}" try: result = subprocess.run( ["rclone", "about", remote, "--config", str(RCLONE_CONF), "--json"], capture_output=True, text=True, timeout=30 ) if result.returncode == 0: data = json.loads(result.stdout) storage_file = Path("/tmp/cloud_backup_storage.json") storage_file.write_text(json.dumps({"used": data.get("used"), "total": data.get("total")})) except Exception as e: logger.debug(f"Failed to get storage info: {e}") # ============================================================================ # COOLDOWN LOOP # ============================================================================ def _check_trigger_file(): """Check for and process a trigger file from the API. Returns True if a trigger was found and processed. """ global active_job_id, sync_started_at, sync_completed_at, sync_error, sync_phase if not TRIGGER_FILE.exists(): return False if currently_syncing: logger.debug("Trigger file found but already syncing, ignoring") return False try: trigger_data = json.loads(TRIGGER_FILE.read_text()) TRIGGER_FILE.unlink(missing_ok=True) except (json.JSONDecodeError, OSError) as e: logger.warning(f"Failed to read trigger file: {e}") try: TRIGGER_FILE.unlink(missing_ok=True) except OSError: pass return False trigger_type = trigger_data.get("type", "sync") job_id = trigger_data.get("job_id", str(int(time.time()))) logger.info(f"Trigger file received: type={trigger_type}, job_id={job_id}") # Set up job tracking active_job_id = job_id sync_started_at = datetime.now().isoformat() sync_completed_at = None sync_error = None sync_phase = "preparing" write_status() try: if trigger_type == "full_backup": run_full_backup() else: # Default: file sync with MD DB dump only _run_manual_sync() except Exception as e: sync_error = str(e) logger.error(f"Triggered sync failed: {e}") # Mark job complete — write final status with job data, then clear job_id sync_completed_at = datetime.now().isoformat() if sync_error is None and last_sync_status == "error": sync_error = last_error sync_phase = "completed" write_status() logger.info(f"Triggered sync completed: job_id={job_id}") # Keep job_id in status file for 60s so API can read completion status, # then clear it on next cooldown_loop iteration (active_job_id stays set # until overwritten by the next trigger or cleared after the API reads it) return True def _run_manual_sync(): """Run a manual sync: Media Downloader DB dump + file sync (no Immich DB, no app archive).""" global sync_phase sync_phase = "preparing" write_status() logger.info("Running pre-sync dumps for manual sync (MD DB only)...") dump_errors = _run_pre_sync_dumps(include_immich_db=False, include_app_archive=False) if dump_errors: logger.warning(f"Pre-sync dump errors: {dump_errors}") run_file_sync() def cooldown_loop(): """Check every 10s if per-directory cooldowns have elapsed, or process API triggers.""" while not shutdown_event.is_set(): shutdown_event.wait(timeout=10) if shutdown_event.is_set(): break write_status() # Check for API trigger file (higher priority than inotify cooldown) if not currently_syncing: try: _check_trigger_file() except Exception as e: logger.error(f"Trigger file check failed: {e}") if currently_syncing: continue # Check which dirty dirs have had their cooldown elapse with config_lock: cooldown = current_config.get('cooldown_seconds', 300) now = time.time() ready_dirs = [] with dirty_dirs_lock: for dir_name, change_time in list(dirty_dirs.items()): if now - change_time >= cooldown: ready_dirs.append(dir_name) if not ready_dirs: continue logger.info(f"Cooldown elapsed ({cooldown}s), syncing {len(ready_dirs)} directories: {', '.join(ready_dirs)}") try: run_file_sync(target_dirs=ready_dirs) except Exception as e: logger.error(f"File sync failed: {e}") DAILY_BACKUP_HOUR = 3 # Run full backup at 3 AM def daily_backup_loop(): """Run full backup (DB dumps + app archive + sync) once daily at DAILY_BACKUP_HOUR.""" last_run_date = None while not shutdown_event.is_set(): shutdown_event.wait(timeout=60) if shutdown_event.is_set(): break now = datetime.now() today = now.date() # Skip if already ran today or not the right hour yet if last_run_date == today: continue if now.hour != DAILY_BACKUP_HOUR: continue # Don't start if a file sync is in progress if currently_syncing: logger.info("Daily backup deferred: sync in progress") continue logger.info("Starting daily full backup (DB dumps + app archive + sync)") last_run_date = today try: run_full_backup() except Exception as e: logger.error(f"Daily full backup failed: {e}") # ============================================================================ # INOTIFY WATCHES # ============================================================================ def ensure_inotify_limit(): """Ensure fs.inotify.max_user_watches is high enough.""" try: current = int(Path("/proc/sys/fs/inotify/max_user_watches").read_text().strip()) if current < MIN_INOTIFY_WATCHES: logger.info(f"Increasing inotify watches from {current} to {MIN_INOTIFY_WATCHES}") Path("/proc/sys/fs/inotify/max_user_watches").write_text(str(MIN_INOTIFY_WATCHES)) except (OSError, ValueError) as e: logger.warning(f"Could not check/set inotify watches: {e}") def setup_watches(wm: pyinotify.WatchManager) -> int: """Set up inotify watches on included directories.""" global files_watched with config_lock: include_dirs = current_config.get('include_dirs', []) exclude_dirs = current_config.get('exclude_dirs', []) mask = ( pyinotify.IN_CREATE | pyinotify.IN_MODIFY | pyinotify.IN_DELETE | pyinotify.IN_MOVED_FROM | pyinotify.IN_MOVED_TO | pyinotify.IN_CLOSE_WRITE ) watch_count = 0 for dir_name in include_dirs: dir_path = IMMICH_BASE / dir_name if not dir_path.is_dir(): logger.debug(f"Skipping non-existent directory: {dir_path}") continue def exclude_filter(path): basename = os.path.basename(path) return basename in exclude_dirs or basename.startswith('.') wdd = wm.add_watch( str(dir_path), mask, rec=True, auto_add=True, exclude_filter=exclude_filter, ) for path, wd in wdd.items(): if wd > 0: watch_count += 1 else: logger.warning(f"Failed to watch: {path}") files_watched = watch_count logger.info(f"Watching {watch_count} directories") return watch_count # ============================================================================ # SIGNAL HANDLERS # ============================================================================ def handle_sighup(signum, frame): """Force config reload.""" logger.info("SIGHUP received, forcing config reload") force_reload.set() def handle_sigterm(signum, frame): """Graceful shutdown.""" logger.info("SIGTERM received, shutting down") shutdown_event.set() # Clean up stale trigger file try: TRIGGER_FILE.unlink(missing_ok=True) except OSError: pass # ============================================================================ # MAIN # ============================================================================ def main(): logger.info("Cloud Backup Sync Daemon starting") # Signal handlers signal.signal(signal.SIGHUP, handle_sighup) signal.signal(signal.SIGTERM, handle_sigterm) # Clean up stale trigger file from previous run try: TRIGGER_FILE.unlink(missing_ok=True) except OSError: pass # Ensure inotify limit ensure_inotify_limit() # Load initial config load_config() with config_lock: if not current_config.get('enabled'): logger.info("Cloud backup is disabled, waiting for config change...") # Start config reload thread config_thread = threading.Thread(target=config_reload_loop, daemon=True, name="config-reload") config_thread.start() # Start cooldown/sync thread (file changes only) cooldown_thread = threading.Thread(target=cooldown_loop, daemon=True, name="cooldown-sync") cooldown_thread.start() # Start daily backup thread (DB dumps + app archive + full sync at 3 AM) daily_thread = threading.Thread(target=daily_backup_loop, daemon=True, name="daily-backup") daily_thread.start() # Set up pyinotify wm = pyinotify.WatchManager() handler = ChangeHandler() notifier = pyinotify.Notifier(wm, handler, timeout=5000) # 5s poll timeout setup_watches(wm) write_status() logger.info("Daemon ready, entering main loop") try: while not shutdown_event.is_set(): # Check for events (non-blocking with timeout) if notifier.check_events(timeout=5000): notifier.read_events() notifier.process_events() # Periodically re-check if config changed (dirs added/removed) with config_lock: enabled = current_config.get('enabled', False) if not enabled: time.sleep(10) continue except KeyboardInterrupt: logger.info("Keyboard interrupt, shutting down") finally: shutdown_event.set() notifier.stop() write_status() logger.info("Cloud Backup Sync Daemon stopped") if __name__ == "__main__": main()