Files
media-downloader/scripts/cloud_backup_sync.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

1228 lines
44 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Cloud Backup Sync Daemon
Watches /opt/immich directories for changes using pyinotify and triggers
rclone sync after a configurable cooldown period.
Writes status to /tmp/cloud_backup_status.json for the API to read.
Logs to /opt/media-downloader/logs/cloud_backup.log.
Signals:
SIGHUP - Force config reload from database
SIGTERM - Graceful shutdown
"""
import json
import logging
import os
import re
import signal
import subprocess
import sys
import threading
import time
from datetime import datetime
from pathlib import Path
# Bootstrap database backend
sys.path.insert(0, '/opt/media-downloader')
import modules.db_bootstrap # noqa: E402,F401
import pyinotify
from modules.settings_manager import SettingsManager
from modules.universal_logger import get_logger
# ============================================================================
# CONFIGURATION
# ============================================================================
SETTINGS_KEY = "cloud_backup"
DB_PATH = os.environ.get(
"DATABASE_URL",
"postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
)
# SettingsManager uses the patched sqlite3 module via db_bootstrap, so we just
# pass any path — pgadapter routes it to PostgreSQL.
SM_DB_PATH = "/opt/media-downloader/database/media_downloader.db"
IMMICH_BASE = Path("/opt/immich")
RCLONE_CONF = Path("/root/.config/rclone/rclone.conf")
RCLONE_CRYPT_NAME = "cloud-backup-crypt"
RCLONE_REMOTE_NAME = "cloud-backup-remote"
STATUS_FILE = Path("/tmp/cloud_backup_status.json")
TRIGGER_FILE = Path("/tmp/cloud_backup_trigger.json")
LOG_FILE = Path("/opt/media-downloader/logs/cloud_backup.log")
CONFIG_RELOAD_INTERVAL = 60 # seconds
MIN_INOTIFY_WATCHES = 524288
DEFAULT_CONFIG = {
"enabled": False,
"provider": "b2",
"endpoint": "",
"bucket": "",
"key_id": "",
"application_key": "",
"encryption_enabled": True,
"encryption_password": "",
"encryption_salt": "",
"include_dirs": ["paid", "private", "md", "el", "elv", "ela", "upload", "review", "recycle", "db_dumps", "app_backup"],
"exclude_dirs": ["lost+found", "db.old"],
"cooldown_seconds": 300,
"bandwidth_limit": None,
}
# ============================================================================
# LOGGING
# ============================================================================
logger = get_logger('CloudBackup')
# Add a sync-log-specific handler to the CloudBackup logger only (not root).
# This writes to cloud_backup.log for the config page's log viewer.
# INFO level to exclude DEBUG inotify spam.
_sync_log_handler = logging.FileHandler(str(LOG_FILE))
_sync_log_handler.setLevel(logging.INFO)
_sync_log_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(message)s'))
logging.getLogger('MediaDownloader.CloudBackup').addHandler(_sync_log_handler)
logging.getLogger('MediaDownloader.CloudBackup').propagate = False
# ============================================================================
# GLOBAL STATE
# ============================================================================
# dirty_dirs: maps dir_name -> timestamp of last change (per-directory cooldown)
dirty_dirs = {} # type: dict[str, float]
dirty_dirs_lock = threading.Lock()
config_lock = threading.Lock()
current_config = dict(DEFAULT_CONFIG)
shutdown_event = threading.Event()
force_reload = threading.Event()
syncing_lock = threading.Lock()
currently_syncing = False
error_count = 0
last_sync_time = None
last_sync_status = None
last_error = None
files_watched = 0
# Progress tracking for manual sync jobs (reported via status file)
active_job_id = None
sync_current_dir = None
sync_dirs_synced = 0
sync_dirs_total = 0
sync_phase = None
sync_transfer_stats = None
sync_current_file = None
sync_started_at = None
sync_completed_at = None
sync_error = None
# ============================================================================
# STATUS FILE
# ============================================================================
def write_status():
"""Write current state to status JSON for API consumption."""
global files_watched
with dirty_dirs_lock:
pending_dirs = list(dirty_dirs.keys())
has_pending = len(pending_dirs) > 0
oldest_change = min(dirty_dirs.values()) if dirty_dirs else 0.0
state = "stopped"
if shutdown_event.is_set():
state = "stopped"
elif currently_syncing:
state = "syncing"
elif has_pending:
state = "cooldown"
else:
state = "idle"
cooldown_remaining = 0
if has_pending and not currently_syncing:
cooldown = current_config.get('cooldown_seconds', 300)
elapsed = time.time() - oldest_change
cooldown_remaining = max(0, int(cooldown - elapsed))
status = {
"state": state,
"last_sync": last_sync_time,
"last_sync_status": last_sync_status,
"error_count": error_count,
"last_error": last_error,
"files_watched": files_watched,
"cooldown_remaining": cooldown_remaining,
"sync_pending": has_pending,
"pending_dirs": pending_dirs,
"updated_at": datetime.now().isoformat(),
}
# Job progress fields — include whenever syncing (manual or automatic)
if active_job_id or currently_syncing:
status["job_id"] = active_job_id or "auto"
status["current_dir"] = sync_current_dir
status["dirs_synced"] = sync_dirs_synced
status["dirs_total"] = sync_dirs_total
status["phase"] = sync_phase
status["transfer_stats"] = sync_transfer_stats
status["current_file"] = sync_current_file
status["started_at"] = sync_started_at
status["completed_at"] = sync_completed_at
status["sync_error"] = sync_error
# Get storage used from rclone about (cached, updated during sync)
storage_file = Path("/tmp/cloud_backup_storage.json")
if storage_file.exists():
try:
storage = json.loads(storage_file.read_text())
status["storage_used"] = storage.get("used")
except (json.JSONDecodeError, OSError):
pass
try:
STATUS_FILE.write_text(json.dumps(status))
except OSError as e:
logger.warning(f"Failed to write status file: {e}")
# ============================================================================
# CONFIG LOADING
# ============================================================================
def load_config():
"""Load configuration from SettingsManager (PostgreSQL via pgadapter)."""
global current_config
try:
sm = SettingsManager(SM_DB_PATH)
stored = sm.get(SETTINGS_KEY)
if stored and isinstance(stored, dict):
with config_lock:
current_config = {**DEFAULT_CONFIG, **stored}
logger.debug("Configuration loaded from database")
else:
logger.debug("No cloud backup config in database, using defaults")
except Exception as e:
logger.error(f"Failed to load config: {e}")
def config_reload_loop():
"""Periodically reload config from database."""
while not shutdown_event.is_set():
if force_reload.wait(timeout=CONFIG_RELOAD_INTERVAL):
force_reload.clear()
if shutdown_event.is_set():
break
load_config()
# ============================================================================
# INOTIFY HANDLER
# ============================================================================
class ChangeHandler(pyinotify.ProcessEvent):
"""Handle filesystem change events, tracking which include_dir changed."""
def process_default(self, event):
# Skip temporary files
if event.name and (event.name.startswith('.') or event.name.endswith('.tmp')):
return
# Map the changed path to its top-level include_dir
try:
rel = Path(event.pathname).relative_to(IMMICH_BASE)
top_dir = rel.parts[0] if rel.parts else None
except (ValueError, IndexError):
top_dir = None
if not top_dir:
return
with config_lock:
include_dirs = current_config.get('include_dirs', [])
if top_dir not in include_dirs:
return
with dirty_dirs_lock:
dirty_dirs[top_dir] = time.time()
logger.debug(f"Change detected in {top_dir}/: {event.pathname}")
# ============================================================================
# PRE-SYNC DUMPS (databases + app archive + system configs)
# ============================================================================
def _find_ssd_mount():
"""Auto-detect the SSD underlying a mergerfs mount at IMMICH_BASE."""
try:
mounts = {}
with open('/proc/mounts') as f:
for line in f:
parts = line.split()
if len(parts) >= 2 and parts[0].startswith('/dev/'):
mounts[parts[1]] = parts[0]
with open('/proc/mounts') as f:
for line in f:
parts = line.split()
if len(parts) >= 3 and parts[1] == str(IMMICH_BASE) and 'mergerfs' in parts[2]:
drive_names = parts[0].split(':')
for name in drive_names:
name = name.strip()
if not name:
continue
candidates = [name, f'/mnt/{name}']
for drive_path in candidates:
if drive_path in mounts:
dev = mounts[drive_path]
dev_base = dev.rstrip('0123456789')
dev_name = dev_base.split('/')[-1]
rotational = Path(f'/sys/block/{dev_name}/queue/rotational')
if rotational.exists() and rotational.read_text().strip() == '0':
return Path(drive_path)
except Exception:
pass
return None
_FAST_BASE = _find_ssd_mount() or IMMICH_BASE
DB_DUMPS_DIR = _FAST_BASE / "db_dumps"
APP_BACKUP_DIR = _FAST_BASE / "app_backup"
RCLONE_CONF_PATH = Path("/root/.config/rclone/rclone.conf")
SYSTEMD_FILES = [
"media-downloader.service",
"media-downloader-api.service",
"media-downloader-frontend.service",
"media-downloader-db-cleanup.service",
"media-downloader-db-cleanup.timer",
"xvfb-media-downloader.service",
"cloud-backup-sync.service",
]
def _run_pre_sync_dumps(include_immich_db=True, include_app_archive=True):
"""Dump databases and archive app + system configs before syncing.
Args:
include_immich_db: If False, skip Immich DB dump (only needed daily).
include_app_archive: If False, skip app tar.gz + system configs (only needed daily).
"""
DB_DUMPS_DIR.mkdir(parents=True, exist_ok=True)
APP_BACKUP_DIR.mkdir(parents=True, exist_ok=True)
errors = []
# ── 1. Database dumps ──────────────────────────────────────────────
# Immich PostgreSQL (Docker container) — only during daily full backup
if include_immich_db:
try:
logger.info("Dumping Immich database...")
subprocess.run(
["docker", "exec", "immich_postgres", "rm", "-rf", "/tmp/immich_dump"],
capture_output=True, text=True, timeout=10
)
result = subprocess.run(
["docker", "exec", "immich_postgres",
"pg_dump", "-U", "postgres", "-d", "immich",
"--no-owner", "--no-acl", "-Fd", "-j", "4",
"--exclude-table-data=face_search",
"--exclude-table-data=smart_search",
"-f", "/tmp/immich_dump"],
capture_output=True, text=True, timeout=600
)
if result.returncode == 0:
subprocess.run(
["docker", "exec", "immich_postgres",
"tar", "cf", "/tmp/immich_dump.tar", "-C", "/tmp", "immich_dump"],
capture_output=True, text=True, timeout=120
)
# docker cp directly to SSD-backed DB_DUMPS_DIR
final_dest = str(DB_DUMPS_DIR / "immich_dump.tar")
subprocess.run(
["docker", "cp", "immich_postgres:/tmp/immich_dump.tar", final_dest],
capture_output=True, text=True, timeout=120
)
subprocess.run(
["docker", "exec", "immich_postgres",
"sh", "-c", "rm -rf /tmp/immich_dump /tmp/immich_dump.tar"],
capture_output=True, text=True, timeout=10
)
size_mb = Path(final_dest).stat().st_size / 1e6
logger.info(f"Immich DB dump: {size_mb:.1f} MB")
else:
errors.append(f"Immich pg_dump failed: {result.stderr[:200]}")
logger.error(f"Immich pg_dump failed: {result.stderr[:200]}")
except Exception as e:
errors.append(f"Immich pg_dump error: {e}")
logger.error(f"Immich pg_dump error: {e}")
# Media Downloader PostgreSQL (local) — parallel directory-format dump
# Uses --lock-wait-timeout to avoid hanging when scheduler holds locks,
# with a serial (-j 1) retry if parallel dump fails on lock contention.
try:
logger.info("Dumping Media Downloader database...")
import shutil
# Dump directly to SSD-backed DB_DUMPS_DIR
final_dump_dir = str(DB_DUMPS_DIR / "media_downloader_dump")
if Path(final_dump_dir).exists():
shutil.rmtree(final_dump_dir)
env = os.environ.copy()
env["PGPASSWORD"] = "PNsihOXvvuPwWiIvGlsc9Fh2YmMmB"
base_cmd = [
"pg_dump", "-h", "localhost", "-U", "media_downloader",
"-d", "media_downloader", "--no-owner", "--no-acl",
"--exclude-table-data=thumbnails",
"--lock-wait-timeout=30000",
]
# Try parallel dump first
result = subprocess.run(
base_cmd + ["-Fd", "-j", "4", "-f", final_dump_dir],
capture_output=True, text=True, timeout=600, env=env
)
if result.returncode != 0 and "could not obtain lock" in result.stderr:
# Lock contention — retry serial after a short wait
logger.warning("Parallel pg_dump hit lock contention, retrying serial in 15s...")
if Path(final_dump_dir).exists():
shutil.rmtree(final_dump_dir)
time.sleep(15)
result = subprocess.run(
base_cmd + ["-Fd", "-j", "1", "-f", final_dump_dir],
capture_output=True, text=True, timeout=600, env=env
)
if result.returncode == 0:
total = sum(f.stat().st_size for f in Path(final_dump_dir).rglob('*') if f.is_file())
logger.info(f"Media Downloader DB dump: {total / 1e6:.1f} MB")
else:
errors.append(f"Media Downloader pg_dump failed: {result.stderr[:200]}")
logger.error(f"Media Downloader pg_dump failed: {result.stderr[:200]}")
except Exception as e:
errors.append(f"Media Downloader pg_dump error: {e}")
logger.error(f"Media Downloader pg_dump error: {e}")
# ── 2-6: App archive + system configs (daily only) ─────────────────
if include_app_archive:
try:
logger.info("Archiving media-downloader app...")
import shutil
# Write directly to SSD-backed APP_BACKUP_DIR
final_archive = APP_BACKUP_DIR / "media-downloader-app.tar.gz"
result = subprocess.run(
["tar", "czf", str(final_archive),
"--exclude=./venv",
"--exclude=./web/frontend/node_modules",
"--exclude=./logs",
"--exclude=./cache/thumbnails",
"--exclude=./__pycache__",
"--exclude=./.git",
"--exclude=./temp",
"-C", "/opt", "media-downloader"],
capture_output=True, text=True, timeout=600
)
if result.returncode == 0:
size_mb = final_archive.stat().st_size / 1e6
logger.info(f"App archive: {size_mb:.1f} MB")
else:
errors.append(f"App archive failed: {result.stderr[:200]}")
logger.error(f"App archive failed: {result.stderr[:200]}")
except Exception as e:
errors.append(f"App archive error: {e}")
logger.error(f"App archive error: {e}")
try:
services_dir = APP_BACKUP_DIR / "systemd"
services_dir.mkdir(parents=True, exist_ok=True)
copied = 0
for svc in SYSTEMD_FILES:
src = Path(f"/etc/systemd/system/{svc}")
if src.exists():
(services_dir / svc).write_text(src.read_text())
copied += 1
logger.info(f"Copied {copied} systemd service files")
except Exception as e:
errors.append(f"Systemd backup error: {e}")
logger.error(f"Systemd backup error: {e}")
try:
if RCLONE_CONF_PATH.exists():
(APP_BACKUP_DIR / "rclone.conf").write_text(RCLONE_CONF_PATH.read_text())
logger.info("Copied rclone.conf")
except Exception as e:
errors.append(f"rclone config backup error: {e}")
logger.error(f"rclone config backup error: {e}")
try:
compose_src = IMMICH_BASE / "docker-compose.yml"
if compose_src.exists():
(APP_BACKUP_DIR / "immich-docker-compose.yml").write_text(compose_src.read_text())
env_src = IMMICH_BASE / ".env"
if env_src.exists():
(APP_BACKUP_DIR / "immich-env").write_text(env_src.read_text())
logger.info("Copied Immich docker-compose + .env")
except Exception as e:
errors.append(f"Immich compose backup error: {e}")
logger.error(f"Immich compose backup error: {e}")
try:
restore_src = Path("/opt/media-downloader/scripts/cloud_backup_restore.sh")
if restore_src.exists():
(APP_BACKUP_DIR / "RESTORE.sh").write_text(restore_src.read_text())
logger.info("Copied restore script as RESTORE.sh")
except Exception as e:
errors.append(f"Restore script backup error: {e}")
logger.error(f"Restore script backup error: {e}")
return errors
# ============================================================================
# RCLONE STATS PARSER
# ============================================================================
def _parse_rclone_stats(line):
"""Parse an rclone --stats-one-line output line into structured data.
Actual rclone format:
INFO : 1.424 GiB / 1.424 GiB, 100%, 0 B/s, ETA -
INFO : 1.424 GiB / 1.424 GiB, 100%, 10.5 MiB/s, ETA 7m30s, Checks: 100, Transferred: 15 / 45
"""
stats = {}
try:
m = re.search(r'([\d.]+\s*\S*?B)\s*/\s*([\d.]+\s*\S*?B),\s*(\d+)%,\s*([\d.]+\s*\S*?/s)', line)
if m:
stats['bytes_done'] = m.group(1).strip()
stats['bytes_total'] = m.group(2).strip()
stats['pct'] = int(m.group(3))
stats['speed'] = m.group(4).strip()
m_eta = re.search(r'ETA\s+([\w.]+)', line)
if m_eta and m_eta.group(1) != '-':
stats['eta'] = m_eta.group(1)
m_files = re.search(r'Transferred:\s*(\d+)\s*/\s*(\d+)', line)
if m_files:
stats['files_done'] = int(m_files.group(1))
stats['files_total'] = int(m_files.group(2))
m_checks = re.search(r'Checks:\s*(\d+)', line)
if m_checks:
stats['checks_done'] = int(m_checks.group(1))
m_chk = re.search(r'chk#(\d+)/(\d+)', line)
if m_chk:
stats['checks_done'] = int(m_chk.group(1))
stats['checks_total'] = int(m_chk.group(2))
except (ValueError, AttributeError):
pass
return stats if stats else None
# ============================================================================
# SYNC
# ============================================================================
def run_file_sync(target_dirs=None):
"""Execute rclone sync for specified directories (or all if target_dirs is None).
Args:
target_dirs: Optional list of directory names to sync. If None, syncs all
include_dirs (used by manual triggers and full backups).
Uses Popen with --stats-one-line for live progress tracking.
"""
global currently_syncing, error_count, last_sync_time, last_sync_status, last_error
global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file
global sync_started_at, sync_completed_at, sync_error
with config_lock:
config = dict(current_config)
if not config.get('enabled'):
logger.info("Sync skipped: cloud backup disabled")
# Clear any pending dirs since we can't sync anyway
with dirty_dirs_lock:
dirty_dirs.clear()
return
if not config.get('key_id') or not config.get('bucket'):
logger.warning("Sync skipped: missing credentials or bucket")
with dirty_dirs_lock:
dirty_dirs.clear()
return
with syncing_lock:
currently_syncing = True
try:
_run_file_sync_inner(config, target_dirs)
finally:
with syncing_lock:
currently_syncing = False
def _run_file_sync_inner(config, target_dirs):
"""Inner implementation of run_file_sync (wrapped in try/finally by caller)."""
global error_count, last_sync_time, last_sync_status, last_error
global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file
global sync_started_at, sync_completed_at, sync_error
if target_dirs is not None:
# Targeted sync: only sync specified dirs, remove them from dirty_dirs
with dirty_dirs_lock:
for d in target_dirs:
dirty_dirs.pop(d, None)
dirs_to_sync = [d for d in target_dirs if (IMMICH_BASE / d).is_dir()]
else:
# Full sync: sync all include_dirs, clear entire dirty_dirs
include_dirs = config.get('include_dirs', [])
with dirty_dirs_lock:
dirty_dirs.clear()
dirs_to_sync = [d for d in include_dirs if (IMMICH_BASE / d).is_dir()]
encryption_enabled = config.get('encryption_enabled', True)
bandwidth_limit = config.get('bandwidth_limit')
bucket = config.get('bucket', '')
sync_errors = []
sync_dirs_total = len(dirs_to_sync)
sync_dirs_synced = 0
sync_phase = "syncing"
if not active_job_id:
# Auto sync — set started_at for progress tracking
sync_started_at = datetime.now().isoformat()
sync_completed_at = None
sync_error = None
write_status()
logger.info(f"Starting file sync of {len(dirs_to_sync)} directories")
for i, dir_name in enumerate(dirs_to_sync):
if shutdown_event.is_set():
break
sync_current_dir = dir_name
sync_dirs_synced = i
sync_transfer_stats = None
sync_current_file = None
sync_phase = "checking"
write_status()
src = str(IMMICH_BASE / dir_name)
if encryption_enabled:
dest = f"{RCLONE_CRYPT_NAME}:{dir_name}"
else:
dest = f"{RCLONE_REMOTE_NAME}:{bucket}/{dir_name}"
cmd = [
"rclone", "sync", src, dest,
"--config", str(RCLONE_CONF),
"--stats", "3s",
"--stats-one-line",
"-v",
"--transfers", "4",
"--checkers", "16",
"--fast-list",
]
if bandwidth_limit:
cmd.extend(["--bwlimit", bandwidth_limit])
logger.info(f"Syncing {dir_name}...")
try:
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
log_fh = open(str(LOG_FILE), 'a')
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
log_fh.write(f"\n[{ts}] Syncing {dir_name}/ ({i+1}/{len(dirs_to_sync)})\n")
log_fh.flush()
try:
for line in proc.stderr:
stripped = line.strip()
if not stripped:
continue
# Detect progress/stats lines
is_progress = ('%,' in stripped and '/s' in stripped) or 'chk#' in stripped
# Write to log
clean = re.sub(r'^<\d+>', '', stripped)
clean = re.sub(r'^(INFO|DEBUG|ERROR|NOTICE)\s*:\s*', '', clean).strip()
if is_progress:
log_fh.write(f" [progress] {clean}\n")
elif clean:
log_fh.write(f" {clean}\n")
log_fh.flush()
# Update progress globals
if is_progress:
stats = _parse_rclone_stats(stripped)
if stats:
# Determine phase from what rclone is doing right now:
# - If checks are progressing but no transfers yet/anymore → checking
# - If files are being transferred (pct > 0 or files_total > 0) → transferring
has_active_transfer = (
stats.get('files_total', 0) > 0
and stats.get('files_done', 0) < stats.get('files_total', 0)
) or (0 < stats.get('pct', 0) < 100)
if has_active_transfer:
phase = 'transferring'
else:
phase = 'checking'
# Clear stale current_file when back to checking
sync_current_file = None
sync_transfer_stats = stats
sync_phase = phase
write_status()
if ': Copied' in stripped or ': Moved' in stripped:
m = re.search(r'INFO\s*:\s*(.+?):\s*(?:Copied|Moved)', stripped)
if m:
fname = m.group(1).strip()
sync_current_file = fname.split('/')[-1] if '/' in fname else fname
sync_phase = 'transferring'
write_status()
finally:
log_fh.close()
proc.wait()
if proc.returncode != 0:
err = f"rclone sync failed for {dir_name}: exit code {proc.returncode}"
sync_errors.append(err)
logger.error(err)
else:
logger.info(f"Synced {dir_name} successfully")
except Exception as e:
sync_errors.append(f"Sync error for {dir_name}: {e}")
logger.error(f"Sync error for {dir_name}: {e}")
# Update storage info
_update_storage_info(config)
last_sync_time = datetime.now().isoformat()
if sync_errors:
last_sync_status = "error"
last_error = sync_errors[-1]
error_count += len(sync_errors)
else:
last_sync_status = "success"
last_error = None
sync_dirs_synced = len(dirs_to_sync)
sync_current_dir = None
sync_current_file = None
sync_transfer_stats = None
sync_completed_at = datetime.now().isoformat()
if sync_errors:
sync_error = sync_errors[-1]
logger.info(f"File sync completed: {'with errors' if sync_errors else 'success'}")
write_status()
def run_full_backup():
"""Run DB dumps + app archive + rclone sync (daily scheduled backup).
Uses the same Popen-based progress tracking as run_file_sync().
For daily scheduled backups, includes Immich DB + app archive.
For manual (API-triggered) backups, only dumps Media Downloader DB.
"""
global currently_syncing, error_count, last_sync_time, last_sync_status, last_error
global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file
with config_lock:
config = dict(current_config)
if not config.get('enabled'):
logger.info("Full backup skipped: cloud backup disabled")
return
if not config.get('key_id') or not config.get('bucket'):
logger.warning("Full backup skipped: missing credentials or bucket")
return
with syncing_lock:
currently_syncing = True
try:
_run_full_backup_inner(config)
finally:
with syncing_lock:
currently_syncing = False
def _run_full_backup_inner(config):
"""Inner implementation of run_full_backup (wrapped in try/finally by caller)."""
global error_count, last_sync_time, last_sync_status, last_error
global sync_current_dir, sync_dirs_synced, sync_dirs_total, sync_phase, sync_transfer_stats, sync_current_file
with dirty_dirs_lock:
dirty_dirs.clear()
sync_phase = "preparing"
write_status()
# Run DB dumps + app archive
logger.info("Running daily pre-sync dumps (DB + app archive)...")
dump_errors = _run_pre_sync_dumps()
# Then sync all directories (including db_dumps and app_backup)
include_dirs = config.get('include_dirs', [])
encryption_enabled = config.get('encryption_enabled', True)
bandwidth_limit = config.get('bandwidth_limit')
bucket = config.get('bucket', '')
dirs_to_sync = [d for d in include_dirs if (IMMICH_BASE / d).is_dir()]
sync_errors = list(dump_errors)
sync_dirs_total = len(dirs_to_sync)
sync_dirs_synced = 0
sync_phase = "syncing"
write_status()
logger.info(f"Starting full backup sync of {len(dirs_to_sync)} directories")
for i, dir_name in enumerate(dirs_to_sync):
if shutdown_event.is_set():
break
sync_current_dir = dir_name
sync_dirs_synced = i
sync_transfer_stats = None
sync_current_file = None
sync_phase = "checking"
write_status()
src = str(IMMICH_BASE / dir_name)
if encryption_enabled:
dest = f"{RCLONE_CRYPT_NAME}:{dir_name}"
else:
dest = f"{RCLONE_REMOTE_NAME}:{bucket}/{dir_name}"
cmd = [
"rclone", "sync", src, dest,
"--config", str(RCLONE_CONF),
"--stats", "3s",
"--stats-one-line",
"-v",
"--transfers", "4",
"--checkers", "16",
"--fast-list",
]
if bandwidth_limit:
cmd.extend(["--bwlimit", bandwidth_limit])
logger.info(f"Syncing {dir_name}...")
try:
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
log_fh = open(str(LOG_FILE), 'a')
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
log_fh.write(f"\n[{ts}] Syncing {dir_name}/ ({i+1}/{len(dirs_to_sync)})\n")
log_fh.flush()
try:
for line in proc.stderr:
stripped = line.strip()
if not stripped:
continue
is_progress = ('%,' in stripped and '/s' in stripped) or 'chk#' in stripped
clean = re.sub(r'^<\d+>', '', stripped)
clean = re.sub(r'^(INFO|DEBUG|ERROR|NOTICE)\s*:\s*', '', clean).strip()
if is_progress:
log_fh.write(f" [progress] {clean}\n")
elif clean:
log_fh.write(f" {clean}\n")
log_fh.flush()
if is_progress:
stats = _parse_rclone_stats(stripped)
if stats:
has_active_transfer = (
stats.get('files_total', 0) > 0
and stats.get('files_done', 0) < stats.get('files_total', 0)
) or (0 < stats.get('pct', 0) < 100)
if has_active_transfer:
phase = 'transferring'
else:
phase = 'checking'
sync_current_file = None
sync_transfer_stats = stats
sync_phase = phase
write_status()
if ': Copied' in stripped or ': Moved' in stripped:
m = re.search(r'INFO\s*:\s*(.+?):\s*(?:Copied|Moved)', stripped)
if m:
fname = m.group(1).strip()
sync_current_file = fname.split('/')[-1] if '/' in fname else fname
sync_phase = 'transferring'
write_status()
finally:
log_fh.close()
proc.wait()
if proc.returncode != 0:
err = f"rclone sync failed for {dir_name}: exit code {proc.returncode}"
sync_errors.append(err)
logger.error(err)
else:
logger.info(f"Synced {dir_name} successfully")
except Exception as e:
sync_errors.append(f"Sync error for {dir_name}: {e}")
logger.error(f"Sync error for {dir_name}: {e}")
# Update storage info
_update_storage_info(config)
last_sync_time = datetime.now().isoformat()
if sync_errors:
last_sync_status = "error"
last_error = sync_errors[-1]
error_count += len(sync_errors)
else:
last_sync_status = "success"
last_error = None
sync_dirs_synced = len(dirs_to_sync)
sync_current_dir = None
sync_current_file = None
sync_transfer_stats = None
logger.info(f"Full backup completed: {'with errors' if sync_errors else 'success'}")
write_status()
def _update_storage_info(config: dict):
"""Update cached storage usage from rclone about."""
bucket = config.get('bucket', '')
encryption_enabled = config.get('encryption_enabled', True)
if encryption_enabled:
remote = f"{RCLONE_CRYPT_NAME}:"
else:
remote = f"{RCLONE_REMOTE_NAME}:{bucket}"
try:
result = subprocess.run(
["rclone", "about", remote, "--config", str(RCLONE_CONF), "--json"],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0:
data = json.loads(result.stdout)
storage_file = Path("/tmp/cloud_backup_storage.json")
storage_file.write_text(json.dumps({"used": data.get("used"), "total": data.get("total")}))
except Exception as e:
logger.debug(f"Failed to get storage info: {e}")
# ============================================================================
# COOLDOWN LOOP
# ============================================================================
def _check_trigger_file():
"""Check for and process a trigger file from the API.
Returns True if a trigger was found and processed.
"""
global active_job_id, sync_started_at, sync_completed_at, sync_error, sync_phase
if not TRIGGER_FILE.exists():
return False
if currently_syncing:
logger.debug("Trigger file found but already syncing, ignoring")
return False
try:
trigger_data = json.loads(TRIGGER_FILE.read_text())
TRIGGER_FILE.unlink(missing_ok=True)
except (json.JSONDecodeError, OSError) as e:
logger.warning(f"Failed to read trigger file: {e}")
try:
TRIGGER_FILE.unlink(missing_ok=True)
except OSError:
pass
return False
trigger_type = trigger_data.get("type", "sync")
job_id = trigger_data.get("job_id", str(int(time.time())))
logger.info(f"Trigger file received: type={trigger_type}, job_id={job_id}")
# Set up job tracking
active_job_id = job_id
sync_started_at = datetime.now().isoformat()
sync_completed_at = None
sync_error = None
sync_phase = "preparing"
write_status()
try:
if trigger_type == "full_backup":
run_full_backup()
else:
# Default: file sync with MD DB dump only
_run_manual_sync()
except Exception as e:
sync_error = str(e)
logger.error(f"Triggered sync failed: {e}")
# Mark job complete — write final status with job data, then clear job_id
sync_completed_at = datetime.now().isoformat()
if sync_error is None and last_sync_status == "error":
sync_error = last_error
sync_phase = "completed"
write_status()
logger.info(f"Triggered sync completed: job_id={job_id}")
# Keep job_id in status file for 60s so API can read completion status,
# then clear it on next cooldown_loop iteration (active_job_id stays set
# until overwritten by the next trigger or cleared after the API reads it)
return True
def _run_manual_sync():
"""Run a manual sync: Media Downloader DB dump + file sync (no Immich DB, no app archive)."""
global sync_phase
sync_phase = "preparing"
write_status()
logger.info("Running pre-sync dumps for manual sync (MD DB only)...")
dump_errors = _run_pre_sync_dumps(include_immich_db=False, include_app_archive=False)
if dump_errors:
logger.warning(f"Pre-sync dump errors: {dump_errors}")
run_file_sync()
def cooldown_loop():
"""Check every 10s if per-directory cooldowns have elapsed, or process API triggers."""
while not shutdown_event.is_set():
shutdown_event.wait(timeout=10)
if shutdown_event.is_set():
break
write_status()
# Check for API trigger file (higher priority than inotify cooldown)
if not currently_syncing:
try:
_check_trigger_file()
except Exception as e:
logger.error(f"Trigger file check failed: {e}")
if currently_syncing:
continue
# Check which dirty dirs have had their cooldown elapse
with config_lock:
cooldown = current_config.get('cooldown_seconds', 300)
now = time.time()
ready_dirs = []
with dirty_dirs_lock:
for dir_name, change_time in list(dirty_dirs.items()):
if now - change_time >= cooldown:
ready_dirs.append(dir_name)
if not ready_dirs:
continue
logger.info(f"Cooldown elapsed ({cooldown}s), syncing {len(ready_dirs)} directories: {', '.join(ready_dirs)}")
try:
run_file_sync(target_dirs=ready_dirs)
except Exception as e:
logger.error(f"File sync failed: {e}")
DAILY_BACKUP_HOUR = 3 # Run full backup at 3 AM
def daily_backup_loop():
"""Run full backup (DB dumps + app archive + sync) once daily at DAILY_BACKUP_HOUR."""
last_run_date = None
while not shutdown_event.is_set():
shutdown_event.wait(timeout=60)
if shutdown_event.is_set():
break
now = datetime.now()
today = now.date()
# Skip if already ran today or not the right hour yet
if last_run_date == today:
continue
if now.hour != DAILY_BACKUP_HOUR:
continue
# Don't start if a file sync is in progress
if currently_syncing:
logger.info("Daily backup deferred: sync in progress")
continue
logger.info("Starting daily full backup (DB dumps + app archive + sync)")
last_run_date = today
try:
run_full_backup()
except Exception as e:
logger.error(f"Daily full backup failed: {e}")
# ============================================================================
# INOTIFY WATCHES
# ============================================================================
def ensure_inotify_limit():
"""Ensure fs.inotify.max_user_watches is high enough."""
try:
current = int(Path("/proc/sys/fs/inotify/max_user_watches").read_text().strip())
if current < MIN_INOTIFY_WATCHES:
logger.info(f"Increasing inotify watches from {current} to {MIN_INOTIFY_WATCHES}")
Path("/proc/sys/fs/inotify/max_user_watches").write_text(str(MIN_INOTIFY_WATCHES))
except (OSError, ValueError) as e:
logger.warning(f"Could not check/set inotify watches: {e}")
def setup_watches(wm: pyinotify.WatchManager) -> int:
"""Set up inotify watches on included directories."""
global files_watched
with config_lock:
include_dirs = current_config.get('include_dirs', [])
exclude_dirs = current_config.get('exclude_dirs', [])
mask = (
pyinotify.IN_CREATE |
pyinotify.IN_MODIFY |
pyinotify.IN_DELETE |
pyinotify.IN_MOVED_FROM |
pyinotify.IN_MOVED_TO |
pyinotify.IN_CLOSE_WRITE
)
watch_count = 0
for dir_name in include_dirs:
dir_path = IMMICH_BASE / dir_name
if not dir_path.is_dir():
logger.debug(f"Skipping non-existent directory: {dir_path}")
continue
def exclude_filter(path):
basename = os.path.basename(path)
return basename in exclude_dirs or basename.startswith('.')
wdd = wm.add_watch(
str(dir_path),
mask,
rec=True,
auto_add=True,
exclude_filter=exclude_filter,
)
for path, wd in wdd.items():
if wd > 0:
watch_count += 1
else:
logger.warning(f"Failed to watch: {path}")
files_watched = watch_count
logger.info(f"Watching {watch_count} directories")
return watch_count
# ============================================================================
# SIGNAL HANDLERS
# ============================================================================
def handle_sighup(signum, frame):
"""Force config reload."""
logger.info("SIGHUP received, forcing config reload")
force_reload.set()
def handle_sigterm(signum, frame):
"""Graceful shutdown."""
logger.info("SIGTERM received, shutting down")
shutdown_event.set()
# Clean up stale trigger file
try:
TRIGGER_FILE.unlink(missing_ok=True)
except OSError:
pass
# ============================================================================
# MAIN
# ============================================================================
def main():
logger.info("Cloud Backup Sync Daemon starting")
# Signal handlers
signal.signal(signal.SIGHUP, handle_sighup)
signal.signal(signal.SIGTERM, handle_sigterm)
# Clean up stale trigger file from previous run
try:
TRIGGER_FILE.unlink(missing_ok=True)
except OSError:
pass
# Ensure inotify limit
ensure_inotify_limit()
# Load initial config
load_config()
with config_lock:
if not current_config.get('enabled'):
logger.info("Cloud backup is disabled, waiting for config change...")
# Start config reload thread
config_thread = threading.Thread(target=config_reload_loop, daemon=True, name="config-reload")
config_thread.start()
# Start cooldown/sync thread (file changes only)
cooldown_thread = threading.Thread(target=cooldown_loop, daemon=True, name="cooldown-sync")
cooldown_thread.start()
# Start daily backup thread (DB dumps + app archive + full sync at 3 AM)
daily_thread = threading.Thread(target=daily_backup_loop, daemon=True, name="daily-backup")
daily_thread.start()
# Set up pyinotify
wm = pyinotify.WatchManager()
handler = ChangeHandler()
notifier = pyinotify.Notifier(wm, handler, timeout=5000) # 5s poll timeout
setup_watches(wm)
write_status()
logger.info("Daemon ready, entering main loop")
try:
while not shutdown_event.is_set():
# Check for events (non-blocking with timeout)
if notifier.check_events(timeout=5000):
notifier.read_events()
notifier.process_events()
# Periodically re-check if config changed (dirs added/removed)
with config_lock:
enabled = current_config.get('enabled', False)
if not enabled:
time.sleep(10)
continue
except KeyboardInterrupt:
logger.info("Keyboard interrupt, shutting down")
finally:
shutdown_event.set()
notifier.stop()
write_status()
logger.info("Cloud Backup Sync Daemon stopped")
if __name__ == "__main__":
main()