1526 lines
59 KiB
Python
1526 lines
59 KiB
Python
"""
|
|
Cloud Backup Router
|
|
|
|
Manages cloud backup configuration (rclone-based sync to B2/S3/etc.):
|
|
- Config CRUD with sensitive field masking
|
|
- rclone.conf generation (preserves existing sections)
|
|
- Sync control (trigger, pause, resume)
|
|
- Status and log retrieval
|
|
- Connection testing
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import tempfile
|
|
import uuid
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from threading import Lock
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from fastapi import APIRouter, BackgroundTasks, Depends
|
|
from pydantic import BaseModel, Field
|
|
from slowapi import Limiter
|
|
from slowapi.util import get_remote_address
|
|
|
|
from ..core.dependencies import get_current_user, get_app_state
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('CloudBackup')
|
|
|
|
router = APIRouter(prefix="/api/cloud-backup", tags=["Cloud Backup"])
|
|
limiter = Limiter(key_func=get_remote_address)
|
|
|
|
# ============================================================================
|
|
# CONSTANTS
|
|
# ============================================================================
|
|
|
|
RCLONE_CONF_PATH = Path("/root/.config/rclone/rclone.conf")
|
|
RCLONE_REMOTE_NAME = "cloud-backup-remote"
|
|
RCLONE_CRYPT_NAME = "cloud-backup-crypt"
|
|
STATUS_FILE = Path("/tmp/cloud_backup_status.json")
|
|
TRIGGER_FILE = Path("/tmp/cloud_backup_trigger.json")
|
|
LOG_FILE = Path("/opt/media-downloader/logs/cloud_backup.log")
|
|
IMMICH_BASE = Path("/opt/immich")
|
|
SETTINGS_KEY = "cloud_backup"
|
|
SERVICE_NAME = "cloud-backup-sync.service"
|
|
|
|
SENSITIVE_FIELDS = {"key_id", "application_key", "encryption_password", "encryption_salt"}
|
|
MASK_VALUE = "****"
|
|
|
|
DEFAULT_INCLUDE_DIRS = ["paid", "private", "md", "el", "elv", "ela", "upload", "review", "recycle", "db_dumps", "app_backup"]
|
|
DEFAULT_EXCLUDE_DIRS = ["lost+found", "db.old"]
|
|
|
|
# ============================================================================
|
|
# JOB TRACKING FOR BACKGROUND SYNC
|
|
# ============================================================================
|
|
|
|
_sync_jobs: Dict[str, Dict] = {}
|
|
_jobs_lock = Lock()
|
|
|
|
|
|
def _get_sync_job(job_id: str) -> Optional[Dict]:
|
|
with _jobs_lock:
|
|
return _sync_jobs.get(job_id)
|
|
|
|
|
|
def _update_sync_job(job_id: str, updates: Dict):
|
|
with _jobs_lock:
|
|
if job_id in _sync_jobs:
|
|
_sync_jobs[job_id].update(updates)
|
|
|
|
|
|
def _create_sync_job(job_id: str) -> Dict:
|
|
with _jobs_lock:
|
|
_sync_jobs[job_id] = {
|
|
'id': job_id,
|
|
'status': 'running',
|
|
'started_at': datetime.now().isoformat(),
|
|
'completed_at': None,
|
|
'dirs_synced': 0,
|
|
'dirs_total': 0,
|
|
'current_dir': None,
|
|
'current_file': None,
|
|
'transfer_stats': None,
|
|
'phase': 'preparing',
|
|
'error': None,
|
|
}
|
|
return _sync_jobs[job_id]
|
|
|
|
|
|
def _cleanup_old_sync_jobs():
|
|
with _jobs_lock:
|
|
now = datetime.now()
|
|
to_remove = []
|
|
for job_id, job in _sync_jobs.items():
|
|
if job.get('completed_at'):
|
|
try:
|
|
completed = datetime.fromisoformat(job['completed_at'])
|
|
if (now - completed).total_seconds() > 3600:
|
|
to_remove.append(job_id)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
for job_id in to_remove:
|
|
del _sync_jobs[job_id]
|
|
|
|
|
|
# ============================================================================
|
|
# PYDANTIC MODELS
|
|
# ============================================================================
|
|
|
|
class CloudBackupConfigModel(BaseModel):
|
|
enabled: bool = False
|
|
provider: str = "b2"
|
|
endpoint: str = ""
|
|
bucket: str = ""
|
|
key_id: str = ""
|
|
application_key: str = ""
|
|
encryption_enabled: bool = True
|
|
encryption_password: str = ""
|
|
encryption_salt: str = ""
|
|
include_dirs: List[str] = Field(default_factory=lambda: list(DEFAULT_INCLUDE_DIRS))
|
|
exclude_dirs: List[str] = Field(default_factory=lambda: list(DEFAULT_EXCLUDE_DIRS))
|
|
cooldown_seconds: int = 300
|
|
bandwidth_limit: Optional[str] = None
|
|
|
|
|
|
class CloudBackupConfigUpdate(BaseModel):
|
|
enabled: Optional[bool] = None
|
|
provider: Optional[str] = None
|
|
endpoint: Optional[str] = None
|
|
bucket: Optional[str] = None
|
|
key_id: Optional[str] = None
|
|
application_key: Optional[str] = None
|
|
encryption_enabled: Optional[bool] = None
|
|
encryption_password: Optional[str] = None
|
|
encryption_salt: Optional[str] = None
|
|
include_dirs: Optional[List[str]] = None
|
|
exclude_dirs: Optional[List[str]] = None
|
|
cooldown_seconds: Optional[int] = None
|
|
bandwidth_limit: Optional[str] = None
|
|
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
def _get_settings_manager():
|
|
"""Get SettingsManager from AppState."""
|
|
app_state = get_app_state()
|
|
return app_state.settings
|
|
|
|
|
|
def _load_config() -> dict:
|
|
"""Load cloud backup config from settings DB."""
|
|
sm = _get_settings_manager()
|
|
stored = sm.get(SETTINGS_KEY)
|
|
if stored and isinstance(stored, dict):
|
|
return stored
|
|
return CloudBackupConfigModel().model_dump()
|
|
|
|
|
|
def _save_config(config: dict):
|
|
"""Save cloud backup config to settings DB."""
|
|
sm = _get_settings_manager()
|
|
sm.set(SETTINGS_KEY, config, category='cloud_backup', description='Cloud backup configuration')
|
|
|
|
|
|
def _mask_config(config: dict) -> dict:
|
|
"""Return config with sensitive fields masked."""
|
|
masked = dict(config)
|
|
for field in SENSITIVE_FIELDS:
|
|
val = masked.get(field)
|
|
if val and val != MASK_VALUE:
|
|
masked[field] = MASK_VALUE
|
|
elif not val:
|
|
masked[field] = None
|
|
return masked
|
|
|
|
|
|
def _merge_config_update(existing: dict, update: dict) -> dict:
|
|
"""Merge update into existing config, treating MASK_VALUE as 'keep existing'."""
|
|
merged = dict(existing)
|
|
for key, val in update.items():
|
|
if val is None:
|
|
continue
|
|
if key in SENSITIVE_FIELDS and val == MASK_VALUE:
|
|
continue # keep existing
|
|
merged[key] = val
|
|
return merged
|
|
|
|
|
|
def _rclone_obscure(password: str) -> str:
|
|
"""Obscure a password using rclone obscure."""
|
|
if not password:
|
|
return ""
|
|
result = subprocess.run(
|
|
["rclone", "obscure", password],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"rclone obscure failed: {result.stderr.strip()}")
|
|
return result.stdout.strip()
|
|
|
|
|
|
def _regenerate_rclone_config(config: dict):
|
|
"""
|
|
Regenerate rclone.conf preserving all existing sections
|
|
except cloud-backup-remote and cloud-backup-crypt.
|
|
"""
|
|
RCLONE_CONF_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Read existing config
|
|
existing_content = ""
|
|
if RCLONE_CONF_PATH.exists():
|
|
existing_content = RCLONE_CONF_PATH.read_text()
|
|
|
|
# Parse existing sections (preserve all except our managed ones)
|
|
sections = {}
|
|
current_section = None
|
|
current_lines = []
|
|
|
|
for line in existing_content.splitlines():
|
|
section_match = re.match(r'^\[(.+)\]$', line.strip())
|
|
if section_match:
|
|
if current_section is not None:
|
|
sections[current_section] = '\n'.join(current_lines)
|
|
current_section = section_match.group(1)
|
|
current_lines = [line]
|
|
elif current_section is not None:
|
|
current_lines.append(line)
|
|
# Lines before any section are ignored (shouldn't exist in rclone.conf)
|
|
|
|
if current_section is not None:
|
|
sections[current_section] = '\n'.join(current_lines)
|
|
|
|
# Remove our managed sections
|
|
sections.pop(RCLONE_REMOTE_NAME, None)
|
|
sections.pop(RCLONE_CRYPT_NAME, None)
|
|
|
|
# Build new sections
|
|
provider = config.get('provider', 'b2')
|
|
new_sections = []
|
|
|
|
if provider == 'b2':
|
|
new_sections.append(f"""[{RCLONE_REMOTE_NAME}]
|
|
type = b2
|
|
account = {config.get('key_id', '')}
|
|
key = {config.get('application_key', '')}
|
|
endpoint = {config.get('endpoint', '')}""")
|
|
else:
|
|
# S3-compatible fallback
|
|
new_sections.append(f"""[{RCLONE_REMOTE_NAME}]
|
|
type = s3
|
|
provider = Other
|
|
access_key_id = {config.get('key_id', '')}
|
|
secret_access_key = {config.get('application_key', '')}
|
|
endpoint = {config.get('endpoint', '')}""")
|
|
|
|
if config.get('encryption_enabled', True):
|
|
enc_pass = _rclone_obscure(config.get('encryption_password', ''))
|
|
enc_salt = _rclone_obscure(config.get('encryption_salt', ''))
|
|
bucket = config.get('bucket', '')
|
|
new_sections.append(f"""[{RCLONE_CRYPT_NAME}]
|
|
type = crypt
|
|
remote = {RCLONE_REMOTE_NAME}:{bucket}
|
|
password = {enc_pass}
|
|
password2 = {enc_salt}
|
|
filename_encryption = standard
|
|
directory_name_encryption = true""")
|
|
|
|
# Write config atomically
|
|
output_parts = []
|
|
for section_name, section_content in sections.items():
|
|
output_parts.append(section_content)
|
|
for section in new_sections:
|
|
output_parts.append(section)
|
|
|
|
final_content = '\n\n'.join(output_parts) + '\n'
|
|
|
|
fd, tmp_path = tempfile.mkstemp(dir=str(RCLONE_CONF_PATH.parent), suffix='.tmp')
|
|
try:
|
|
with os.fdopen(fd, 'w') as f:
|
|
f.write(final_content)
|
|
os.chmod(tmp_path, 0o600)
|
|
os.rename(tmp_path, str(RCLONE_CONF_PATH))
|
|
logger.info("rclone.conf regenerated successfully")
|
|
except Exception:
|
|
try:
|
|
os.unlink(tmp_path)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
|
|
|
|
def _read_status_file() -> dict:
|
|
"""Read the daemon's status JSON file."""
|
|
try:
|
|
if STATUS_FILE.exists():
|
|
data = json.loads(STATUS_FILE.read_text())
|
|
return data
|
|
except (json.JSONDecodeError, OSError) as e:
|
|
logger.warning(f"Failed to read status file: {e}")
|
|
return {}
|
|
|
|
|
|
def _get_service_status() -> str:
|
|
"""Check systemd service status."""
|
|
try:
|
|
result = subprocess.run(
|
|
["systemctl", "is-active", SERVICE_NAME],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
return result.stdout.strip() # active, inactive, failed, etc.
|
|
except Exception:
|
|
return "unknown"
|
|
|
|
|
|
def _find_ssd_mount() -> Optional[Path]:
|
|
"""Auto-detect the SSD underlying a mergerfs mount at IMMICH_BASE.
|
|
|
|
Reads /proc/mounts to find the mergerfs entry, resolves drive names
|
|
to mount paths, then checks each drive's rotational flag (ROTA=0 = SSD).
|
|
"""
|
|
try:
|
|
# Build a map of all mount points: mountpoint -> (device, ...)
|
|
mounts = {}
|
|
with open('/proc/mounts') as f:
|
|
for line in f:
|
|
parts = line.split()
|
|
if len(parts) >= 2 and parts[0].startswith('/dev/'):
|
|
mounts[parts[1]] = parts[0]
|
|
|
|
with open('/proc/mounts') as f:
|
|
for line in f:
|
|
parts = line.split()
|
|
if len(parts) >= 3 and parts[1] == str(IMMICH_BASE) and 'mergerfs' in parts[2]:
|
|
# Source can be "samsung2tb:onetouch4tb" or "/mnt/a:/mnt/b"
|
|
drive_names = parts[0].split(':')
|
|
for name in drive_names:
|
|
name = name.strip()
|
|
if not name:
|
|
continue
|
|
# Try as-is first (absolute path), then /mnt/<name>
|
|
candidates = [name, f'/mnt/{name}']
|
|
for drive_path in candidates:
|
|
if drive_path in mounts:
|
|
dev = mounts[drive_path]
|
|
dev_base = dev.rstrip('0123456789')
|
|
dev_name = dev_base.split('/')[-1]
|
|
rotational = Path(f'/sys/block/{dev_name}/queue/rotational')
|
|
if rotational.exists() and rotational.read_text().strip() == '0':
|
|
return Path(drive_path)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def _get_fast_storage_base() -> Path:
|
|
"""Get the fastest storage path for dumps.
|
|
|
|
Returns the SSD mount under mergerfs if detected, otherwise falls back
|
|
to the mergerfs path itself. Dirs created here are visible via mergerfs
|
|
without any cross-filesystem copies.
|
|
"""
|
|
ssd = _find_ssd_mount()
|
|
if ssd:
|
|
return ssd
|
|
return IMMICH_BASE
|
|
|
|
|
|
_FAST_BASE = _get_fast_storage_base()
|
|
DB_DUMPS_DIR = _FAST_BASE / "db_dumps"
|
|
APP_BACKUP_DIR = _FAST_BASE / "app_backup"
|
|
|
|
# Systemd service files to back up
|
|
SYSTEMD_FILES = [
|
|
"media-downloader.service",
|
|
"media-downloader-api.service",
|
|
"media-downloader-frontend.service",
|
|
"media-downloader-db-cleanup.service",
|
|
"media-downloader-db-cleanup.timer",
|
|
"xvfb-media-downloader.service",
|
|
"cloud-backup-sync.service",
|
|
]
|
|
|
|
|
|
def _run_pre_sync_dumps(job_id: str = None, include_immich_db: bool = True, include_app_archive: bool = True):
|
|
"""Dump databases and archive app + system configs before syncing.
|
|
|
|
Args:
|
|
include_immich_db: If False, skip Immich DB dump (only needed daily).
|
|
include_app_archive: If False, skip app tar.gz + system configs (only needed daily).
|
|
"""
|
|
DB_DUMPS_DIR.mkdir(parents=True, exist_ok=True)
|
|
APP_BACKUP_DIR.mkdir(parents=True, exist_ok=True)
|
|
errors = []
|
|
|
|
import shutil
|
|
import time as _time
|
|
|
|
def _status(msg):
|
|
if job_id:
|
|
_update_sync_job(job_id, {'current_dir': msg})
|
|
|
|
def _get_dir_size(path):
|
|
"""Get total size of a directory in bytes."""
|
|
try:
|
|
return sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
|
|
except OSError:
|
|
return 0
|
|
|
|
def _get_docker_dir_size(container, path):
|
|
"""Get size of a directory inside a docker container."""
|
|
try:
|
|
r = subprocess.run(
|
|
["docker", "exec", container, "du", "-sb", path],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
if r.returncode == 0:
|
|
return int(r.stdout.split()[0])
|
|
except Exception:
|
|
pass
|
|
return 0
|
|
|
|
def _run_with_progress(cmd, label, total_bytes, size_fn, env=None):
|
|
"""Run a subprocess while polling output size for progress updates."""
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
|
|
while proc.poll() is None:
|
|
_time.sleep(3)
|
|
current = size_fn()
|
|
if total_bytes > 0:
|
|
pct = min(99, int(current / total_bytes * 100))
|
|
_status(f"{label} ({pct}%)")
|
|
stdout, stderr = proc.communicate()
|
|
return proc.returncode, stderr.decode() if stderr else ""
|
|
|
|
# ── 1. Database dumps ──────────────────────────────────────────────
|
|
|
|
# Immich PostgreSQL (Docker container) — only during daily/full backup
|
|
if include_immich_db:
|
|
try:
|
|
_status("Dumping Immich database (0%)...")
|
|
logger.info("Dumping Immich database...")
|
|
# Get DB size for progress tracking
|
|
sz_result = subprocess.run(
|
|
["docker", "exec", "immich_postgres",
|
|
"psql", "-U", "postgres", "-d", "immich", "-tAc",
|
|
"SELECT pg_database_size('immich')"],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
immich_db_size = int(sz_result.stdout.strip()) if sz_result.returncode == 0 else 0
|
|
|
|
# Clean previous dump dir
|
|
subprocess.run(
|
|
["docker", "exec", "immich_postgres", "rm", "-rf", "/tmp/immich_dump"],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
|
|
rc, stderr = _run_with_progress(
|
|
["docker", "exec", "immich_postgres",
|
|
"pg_dump", "-U", "postgres", "-d", "immich",
|
|
"--no-owner", "--no-acl", "-Fd", "-j", "4",
|
|
"--exclude-table-data=face_search",
|
|
"--exclude-table-data=smart_search",
|
|
"-f", "/tmp/immich_dump"],
|
|
"Dumping Immich DB",
|
|
immich_db_size,
|
|
lambda: _get_docker_dir_size("immich_postgres", "/tmp/immich_dump"),
|
|
)
|
|
if rc == 0:
|
|
_status("Copying Immich dump...")
|
|
subprocess.run(
|
|
["docker", "exec", "immich_postgres",
|
|
"tar", "cf", "/tmp/immich_dump.tar", "-C", "/tmp", "immich_dump"],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
# docker cp directly to SSD-backed DB_DUMPS_DIR (no cross-fs move needed)
|
|
final_dest = str(DB_DUMPS_DIR / "immich_dump.tar")
|
|
subprocess.run(
|
|
["docker", "cp", "immich_postgres:/tmp/immich_dump.tar", final_dest],
|
|
capture_output=True, text=True, timeout=120
|
|
)
|
|
subprocess.run(
|
|
["docker", "exec", "immich_postgres",
|
|
"sh", "-c", "rm -rf /tmp/immich_dump /tmp/immich_dump.tar"],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
size_mb = Path(final_dest).stat().st_size / 1e6
|
|
logger.info(f"Immich DB dump: {size_mb:.1f} MB")
|
|
else:
|
|
errors.append(f"Immich pg_dump failed: {stderr[:200]}")
|
|
logger.error(f"Immich pg_dump failed: {stderr[:200]}")
|
|
except Exception as e:
|
|
errors.append(f"Immich pg_dump error: {e}")
|
|
logger.error(f"Immich pg_dump error: {e}")
|
|
|
|
# Media Downloader PostgreSQL (local) — parallel directory-format dump
|
|
# Uses --lock-wait-timeout to avoid hanging when scheduler holds locks,
|
|
# with a serial (-j 1) retry if parallel dump fails on lock contention.
|
|
try:
|
|
_status("Dumping Media Downloader DB (0%)...")
|
|
logger.info("Dumping Media Downloader database...")
|
|
# Get DB size
|
|
env = os.environ.copy()
|
|
env["PGPASSWORD"] = "PNsihOXvvuPwWiIvGlsc9Fh2YmMmB"
|
|
sz_result = subprocess.run(
|
|
["psql", "-h", "localhost", "-U", "media_downloader",
|
|
"-d", "media_downloader", "-tAc",
|
|
"SELECT pg_database_size('media_downloader')"],
|
|
capture_output=True, text=True, timeout=10, env=env
|
|
)
|
|
md_db_size = int(sz_result.stdout.strip()) if sz_result.returncode == 0 else 0
|
|
|
|
# Dump directly to destination (cross-fs move is slower than direct write)
|
|
final_dump_dir = str(DB_DUMPS_DIR / "media_downloader_dump")
|
|
if Path(final_dump_dir).exists():
|
|
shutil.rmtree(final_dump_dir)
|
|
|
|
base_cmd = [
|
|
"pg_dump", "-h", "localhost", "-U", "media_downloader",
|
|
"-d", "media_downloader", "--no-owner", "--no-acl",
|
|
"--exclude-table-data=thumbnails",
|
|
"--lock-wait-timeout=30000",
|
|
]
|
|
rc, stderr = _run_with_progress(
|
|
base_cmd + ["-Fd", "-j", "4", "-f", final_dump_dir],
|
|
"Dumping Media Downloader DB",
|
|
md_db_size,
|
|
lambda: _get_dir_size(final_dump_dir),
|
|
env=env,
|
|
)
|
|
if rc != 0 and "could not obtain lock" in stderr:
|
|
# Lock contention — retry serial after a short wait
|
|
logger.warning("Parallel pg_dump hit lock contention, retrying serial in 15s...")
|
|
_status("DB lock contention, retrying...")
|
|
if Path(final_dump_dir).exists():
|
|
shutil.rmtree(final_dump_dir)
|
|
import time as _time2
|
|
_time2.sleep(15)
|
|
rc, stderr = _run_with_progress(
|
|
base_cmd + ["-Fd", "-j", "1", "-f", final_dump_dir],
|
|
"Dumping Media Downloader DB (retry)",
|
|
md_db_size,
|
|
lambda: _get_dir_size(final_dump_dir),
|
|
env=env,
|
|
)
|
|
if rc == 0:
|
|
total = _get_dir_size(final_dump_dir)
|
|
logger.info(f"Media Downloader DB dump: {total / 1e6:.1f} MB")
|
|
else:
|
|
errors.append(f"Media Downloader pg_dump failed: {stderr[:200]}")
|
|
logger.error(f"Media Downloader pg_dump failed: {stderr[:200]}")
|
|
except Exception as e:
|
|
errors.append(f"Media Downloader pg_dump error: {e}")
|
|
logger.error(f"Media Downloader pg_dump error: {e}")
|
|
|
|
# ── 2-6: App archive + system configs (daily only) ─────────────────
|
|
|
|
if include_app_archive:
|
|
try:
|
|
_status("Archiving media-downloader app...")
|
|
logger.info("Archiving media-downloader app...")
|
|
# Write directly to SSD-backed APP_BACKUP_DIR (no cross-fs move)
|
|
final_archive = APP_BACKUP_DIR / "media-downloader-app.tar.gz"
|
|
result = subprocess.run(
|
|
["tar", "czf", str(final_archive),
|
|
"--exclude=./venv",
|
|
"--exclude=./web/frontend/node_modules",
|
|
"--exclude=./logs",
|
|
"--exclude=./cache/thumbnails",
|
|
"--exclude=./__pycache__",
|
|
"--exclude=./.git",
|
|
"--exclude=./temp",
|
|
"-C", "/opt", "media-downloader"],
|
|
capture_output=True, text=True, timeout=600
|
|
)
|
|
if result.returncode == 0:
|
|
size_mb = final_archive.stat().st_size / 1e6
|
|
logger.info(f"App archive: {size_mb:.1f} MB")
|
|
else:
|
|
errors.append(f"App archive failed: {result.stderr[:200]}")
|
|
logger.error(f"App archive failed: {result.stderr[:200]}")
|
|
except Exception as e:
|
|
errors.append(f"App archive error: {e}")
|
|
logger.error(f"App archive error: {e}")
|
|
|
|
try:
|
|
services_dir = APP_BACKUP_DIR / "systemd"
|
|
services_dir.mkdir(parents=True, exist_ok=True)
|
|
copied = 0
|
|
for svc in SYSTEMD_FILES:
|
|
src = Path(f"/etc/systemd/system/{svc}")
|
|
if src.exists():
|
|
(services_dir / svc).write_text(src.read_text())
|
|
copied += 1
|
|
logger.info(f"Copied {copied} systemd service files")
|
|
except Exception as e:
|
|
errors.append(f"Systemd backup error: {e}")
|
|
logger.error(f"Systemd backup error: {e}")
|
|
|
|
try:
|
|
if RCLONE_CONF_PATH.exists():
|
|
(APP_BACKUP_DIR / "rclone.conf").write_text(RCLONE_CONF_PATH.read_text())
|
|
logger.info("Copied rclone.conf")
|
|
except Exception as e:
|
|
errors.append(f"rclone config backup error: {e}")
|
|
logger.error(f"rclone config backup error: {e}")
|
|
|
|
try:
|
|
compose_src = IMMICH_BASE / "docker-compose.yml"
|
|
if compose_src.exists():
|
|
(APP_BACKUP_DIR / "immich-docker-compose.yml").write_text(compose_src.read_text())
|
|
env_src = IMMICH_BASE / ".env"
|
|
if env_src.exists():
|
|
(APP_BACKUP_DIR / "immich-env").write_text(env_src.read_text())
|
|
logger.info("Copied Immich docker-compose + .env")
|
|
except Exception as e:
|
|
errors.append(f"Immich compose backup error: {e}")
|
|
logger.error(f"Immich compose backup error: {e}")
|
|
|
|
try:
|
|
restore_src = Path("/opt/media-downloader/scripts/cloud_backup_restore.sh")
|
|
if restore_src.exists():
|
|
(APP_BACKUP_DIR / "RESTORE.sh").write_text(restore_src.read_text())
|
|
logger.info("Copied restore script as RESTORE.sh")
|
|
except Exception as e:
|
|
errors.append(f"Restore script backup error: {e}")
|
|
logger.error(f"Restore script backup error: {e}")
|
|
|
|
return errors
|
|
|
|
|
|
def _run_full_backup(config: dict, job_id: str):
|
|
"""Run Media Downloader DB dump + rclone sync.
|
|
Immich DB + app archive are daily-only (handled by daemon at 3 AM).
|
|
"""
|
|
dump_errors = _run_pre_sync_dumps(job_id=job_id, include_immich_db=False, include_app_archive=False)
|
|
_run_file_sync(config, job_id, extra_errors=dump_errors)
|
|
|
|
|
|
def _parse_rclone_stats(line: str) -> Optional[Dict]:
|
|
"""Parse an rclone --stats-one-line output line into structured data.
|
|
|
|
Actual rclone format:
|
|
INFO : 1.424 GiB / 1.424 GiB, 100%, 0 B/s, ETA -
|
|
INFO : 1.424 GiB / 1.424 GiB, 100%, 10.5 MiB/s, ETA 7m30s, Checks: 100, Transferred: 15 / 45
|
|
"""
|
|
stats = {}
|
|
try:
|
|
# Extract: bytes_done / bytes_total, pct%, speed
|
|
m = re.search(r'([\d.]+\s*\S*?B)\s*/\s*([\d.]+\s*\S*?B),\s*(\d+)%,\s*([\d.]+\s*\S*?/s)', line)
|
|
if m:
|
|
stats['bytes_done'] = m.group(1).strip()
|
|
stats['bytes_total'] = m.group(2).strip()
|
|
stats['pct'] = int(m.group(3))
|
|
stats['speed'] = m.group(4).strip()
|
|
# Extract ETA
|
|
m_eta = re.search(r'ETA\s+([\w.]+)', line)
|
|
if m_eta and m_eta.group(1) != '-':
|
|
stats['eta'] = m_eta.group(1)
|
|
# Extract file count: "Transferred: 15 / 45" at end of one-line stats
|
|
m_files = re.search(r'Transferred:\s*(\d+)\s*/\s*(\d+)', line)
|
|
if m_files:
|
|
stats['files_done'] = int(m_files.group(1))
|
|
stats['files_total'] = int(m_files.group(2))
|
|
# Extract checks count — "Checks: 100" or "(chk#573/582)"
|
|
m_checks = re.search(r'Checks:\s*(\d+)', line)
|
|
if m_checks:
|
|
stats['checks_done'] = int(m_checks.group(1))
|
|
m_chk = re.search(r'chk#(\d+)/(\d+)', line)
|
|
if m_chk:
|
|
stats['checks_done'] = int(m_chk.group(1))
|
|
stats['checks_total'] = int(m_chk.group(2))
|
|
except (ValueError, AttributeError):
|
|
pass
|
|
return stats if stats else None
|
|
|
|
|
|
def _run_file_sync(config: dict, job_id: str, extra_errors: list = None):
|
|
"""Run rclone sync for all included directories with live transfer stats."""
|
|
include_dirs = config.get('include_dirs', DEFAULT_INCLUDE_DIRS)
|
|
encryption_enabled = config.get('encryption_enabled', True)
|
|
bandwidth_limit = config.get('bandwidth_limit')
|
|
bucket = config.get('bucket', '')
|
|
|
|
dirs_to_sync = [d for d in include_dirs if (IMMICH_BASE / d).is_dir()]
|
|
_update_sync_job(job_id, {'dirs_total': len(dirs_to_sync), 'transfer_stats': None})
|
|
|
|
errors = list(extra_errors) if extra_errors else []
|
|
for i, dir_name in enumerate(dirs_to_sync):
|
|
_update_sync_job(job_id, {
|
|
'current_dir': dir_name, 'dirs_synced': i,
|
|
'transfer_stats': None, 'current_file': None,
|
|
'phase': 'checking',
|
|
})
|
|
src = str(IMMICH_BASE / dir_name)
|
|
if encryption_enabled:
|
|
dest = f"{RCLONE_CRYPT_NAME}:{dir_name}"
|
|
else:
|
|
dest = f"{RCLONE_REMOTE_NAME}:{bucket}/{dir_name}"
|
|
|
|
cmd = [
|
|
"rclone", "sync", src, dest,
|
|
"--config", str(RCLONE_CONF_PATH),
|
|
"--stats", "3s",
|
|
"--stats-one-line",
|
|
"-v",
|
|
"--transfers", "4",
|
|
"--checkers", "16",
|
|
"--fast-list",
|
|
]
|
|
if bandwidth_limit:
|
|
cmd.extend(["--bwlimit", bandwidth_limit])
|
|
|
|
try:
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
log_fh = open(str(LOG_FILE), 'a')
|
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
log_fh.write(f"\n[{ts}] Syncing {dir_name}/ ({i+1}/{len(dirs_to_sync)})\n")
|
|
log_fh.flush()
|
|
try:
|
|
for line in proc.stderr:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
# Clean rclone output: strip syslog prefix "<6>INFO :" → clean message
|
|
clean = re.sub(r'^<\d+>', '', stripped) # strip syslog priority
|
|
clean = re.sub(r'^(INFO|DEBUG|ERROR|NOTICE)\s*:\s*', '', clean).strip()
|
|
|
|
# Detect progress/stats lines (with % or chk# markers)
|
|
is_progress = ('%,' in stripped and '/s' in stripped) or 'chk#' in stripped
|
|
|
|
# Write cleaned output to log file
|
|
if is_progress:
|
|
# Progress line — only log the latest (overwrite style)
|
|
log_fh.write(f" [progress] {clean}\n")
|
|
elif ': Copied' in stripped or ': Moved' in stripped:
|
|
m = re.search(r'(.+?):\s*(Copied|Moved)\s*(.*)', clean)
|
|
if m:
|
|
fname = m.group(1).strip()
|
|
action = m.group(2)
|
|
detail = m.group(3).strip()
|
|
short = fname.split('/')[-1] if '/' in fname else fname
|
|
log_fh.write(f" {action}: {short} {detail}\n")
|
|
else:
|
|
log_fh.write(f" {clean}\n")
|
|
elif 'Checks:' in clean and 'Transferred:' in clean:
|
|
# Summary line at end of dir sync
|
|
log_fh.write(f" {clean}\n")
|
|
elif clean:
|
|
log_fh.write(f" {clean}\n")
|
|
log_fh.flush()
|
|
|
|
# Update live stats for frontend
|
|
if is_progress:
|
|
stats = _parse_rclone_stats(stripped)
|
|
if stats:
|
|
# Determine phase from stats
|
|
phase = 'checking'
|
|
if stats.get('files_done', 0) > 0 or stats.get('files_total', 0) > 0:
|
|
phase = 'transferring'
|
|
elif stats.get('pct', 0) > 0:
|
|
phase = 'transferring'
|
|
_update_sync_job(job_id, {'transfer_stats': stats, 'phase': phase})
|
|
|
|
if ': Copied' in stripped or ': Moved' in stripped:
|
|
m = re.search(r'INFO\s*:\s*(.+?):\s*(?:Copied|Moved)', stripped)
|
|
if m:
|
|
fname = m.group(1).strip()
|
|
short = fname.split('/')[-1] if '/' in fname else fname
|
|
_update_sync_job(job_id, {'current_file': short, 'phase': 'transferring'})
|
|
finally:
|
|
log_fh.close()
|
|
|
|
proc.wait()
|
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
if proc.returncode != 0:
|
|
err_msg = f"Sync failed for {dir_name}: exit code {proc.returncode}"
|
|
errors.append(err_msg)
|
|
logger.error(err_msg)
|
|
with open(str(LOG_FILE), 'a') as f:
|
|
f.write(f"[{ts}] ERROR: {dir_name}/ failed (exit {proc.returncode})\n")
|
|
else:
|
|
with open(str(LOG_FILE), 'a') as f:
|
|
f.write(f"[{ts}] Completed {dir_name}/\n")
|
|
except Exception as e:
|
|
errors.append(f"Sync error for {dir_name}: {e}")
|
|
logger.error(f"Sync error for {dir_name}: {e}")
|
|
|
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
status_msg = 'completed' if not errors else f'completed with {len(errors)} error(s)'
|
|
with open(str(LOG_FILE), 'a') as f:
|
|
f.write(f"\n[{ts}] Sync {status_msg} — {len(dirs_to_sync)} directories\n{'='*60}\n")
|
|
|
|
completed_status = 'completed' if not errors else 'completed_with_errors'
|
|
_update_sync_job(job_id, {
|
|
'status': completed_status,
|
|
'completed_at': datetime.now().isoformat(),
|
|
'dirs_synced': len(dirs_to_sync),
|
|
'current_dir': None,
|
|
'error': '; '.join(errors) if errors else None,
|
|
})
|
|
|
|
# Write status file for daemon/dashboard
|
|
_write_manual_sync_status(completed_status, errors)
|
|
_cleanup_old_sync_jobs()
|
|
|
|
# Auto-refresh cloud storage stats after sync
|
|
_trigger_cloud_status_refresh()
|
|
|
|
|
|
def _write_manual_sync_status(status: str, errors: list):
|
|
"""Write sync completion info to status file for dashboard consumption."""
|
|
try:
|
|
existing = _read_status_file()
|
|
existing['last_manual_sync'] = datetime.now().isoformat()
|
|
existing['last_manual_sync_status'] = status
|
|
if errors:
|
|
existing['last_manual_sync_errors'] = errors
|
|
STATUS_FILE.write_text(json.dumps(existing))
|
|
except OSError as e:
|
|
logger.warning(f"Failed to write status file: {e}")
|
|
|
|
|
|
# ============================================================================
|
|
# API ENDPOINTS
|
|
# ============================================================================
|
|
|
|
@router.get("/config")
|
|
async def get_config(user=Depends(get_current_user)):
|
|
"""Get cloud backup configuration (sensitive fields masked)."""
|
|
config = _load_config()
|
|
return _mask_config(config)
|
|
|
|
|
|
@router.put("/config")
|
|
async def update_config(update: CloudBackupConfigUpdate, user=Depends(get_current_user)):
|
|
"""Save cloud backup configuration and regenerate rclone.conf."""
|
|
existing = _load_config()
|
|
update_dict = update.model_dump(exclude_unset=True)
|
|
merged = _merge_config_update(existing, update_dict)
|
|
|
|
# Validate required fields if enabling
|
|
if merged.get('enabled'):
|
|
missing = []
|
|
if not merged.get('endpoint'):
|
|
missing.append('endpoint')
|
|
if not merged.get('bucket'):
|
|
missing.append('bucket')
|
|
if not merged.get('key_id'):
|
|
missing.append('key_id')
|
|
if not merged.get('application_key'):
|
|
missing.append('application_key')
|
|
if merged.get('encryption_enabled'):
|
|
if not merged.get('encryption_password'):
|
|
missing.append('encryption_password')
|
|
if not merged.get('encryption_salt'):
|
|
missing.append('encryption_salt')
|
|
if missing:
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=400, detail=f"Missing required fields: {', '.join(missing)}")
|
|
|
|
_save_config(merged)
|
|
|
|
# Regenerate rclone config if we have credentials
|
|
if merged.get('key_id') and merged.get('application_key'):
|
|
try:
|
|
_regenerate_rclone_config(merged)
|
|
except Exception as e:
|
|
logger.error(f"Failed to regenerate rclone config: {e}")
|
|
return {"status": "saved", "rclone_config": "failed", "error": str(e), "config": _mask_config(merged)}
|
|
|
|
return {"status": "saved", "rclone_config": "updated", "config": _mask_config(merged)}
|
|
|
|
|
|
@router.get("/status")
|
|
async def get_status(user=Depends(get_current_user)):
|
|
"""Get sync status including service state, last sync time, storage info.
|
|
|
|
Reads job progress from the daemon's status file. When the daemon is
|
|
running a manual sync (has job_id), its progress fields take priority
|
|
over in-memory job tracking.
|
|
"""
|
|
config = _load_config()
|
|
service_status = _get_service_status()
|
|
daemon_status = _read_status_file()
|
|
|
|
active_job = None
|
|
daemon_job_id = daemon_status.get('job_id')
|
|
|
|
# Priority 1: Daemon is actively syncing a job — build active_job from daemon progress
|
|
if daemon_job_id and daemon_status.get('state') == 'syncing':
|
|
active_job = {
|
|
'id': daemon_job_id,
|
|
'status': 'running',
|
|
'started_at': daemon_status.get('started_at'),
|
|
'completed_at': None,
|
|
'dirs_synced': daemon_status.get('dirs_synced', 0),
|
|
'dirs_total': daemon_status.get('dirs_total', 0),
|
|
'current_dir': daemon_status.get('current_dir'),
|
|
'current_file': daemon_status.get('current_file'),
|
|
'transfer_stats': daemon_status.get('transfer_stats'),
|
|
'phase': daemon_status.get('phase', 'syncing'),
|
|
'error': None,
|
|
}
|
|
# Also update in-memory job to stay in sync
|
|
if _get_sync_job(daemon_job_id):
|
|
_update_sync_job(daemon_job_id, {
|
|
'dirs_synced': daemon_status.get('dirs_synced', 0),
|
|
'dirs_total': daemon_status.get('dirs_total', 0),
|
|
'current_dir': daemon_status.get('current_dir'),
|
|
'current_file': daemon_status.get('current_file'),
|
|
'transfer_stats': daemon_status.get('transfer_stats'),
|
|
'phase': daemon_status.get('phase', 'syncing'),
|
|
})
|
|
|
|
# Priority 2: Daemon has a completed job — update in-memory job
|
|
elif daemon_job_id and daemon_status.get('completed_at'):
|
|
in_mem = _get_sync_job(daemon_job_id)
|
|
if in_mem and in_mem.get('status') == 'running':
|
|
completed_status = 'completed' if not daemon_status.get('sync_error') else 'completed_with_errors'
|
|
_update_sync_job(daemon_job_id, {
|
|
'status': completed_status,
|
|
'completed_at': daemon_status.get('completed_at'),
|
|
'dirs_synced': daemon_status.get('dirs_synced', 0),
|
|
'dirs_total': daemon_status.get('dirs_total', 0),
|
|
'error': daemon_status.get('sync_error'),
|
|
'phase': 'completed',
|
|
'current_dir': None,
|
|
'current_file': None,
|
|
'transfer_stats': None,
|
|
})
|
|
# Sync just finished in daemon — refresh cloud storage stats
|
|
_trigger_cloud_status_refresh()
|
|
|
|
# Priority 3: Fall back to in-memory jobs (legacy / pending trigger)
|
|
if active_job is None:
|
|
with _jobs_lock:
|
|
for job in _sync_jobs.values():
|
|
if job['status'] == 'running':
|
|
active_job = dict(job)
|
|
break
|
|
|
|
# Determine effective state
|
|
is_syncing = active_job is not None or daemon_status.get('state') == 'syncing'
|
|
if active_job is not None:
|
|
effective_state = 'syncing'
|
|
elif daemon_status.get('state'):
|
|
effective_state = daemon_status['state']
|
|
elif service_status == 'active':
|
|
effective_state = 'idle'
|
|
else:
|
|
effective_state = 'idle'
|
|
|
|
return {
|
|
"configured": bool(config.get('key_id') and config.get('bucket')),
|
|
"enabled": config.get('enabled', False),
|
|
"service_status": service_status,
|
|
"syncing": is_syncing,
|
|
"last_sync": daemon_status.get('last_sync', daemon_status.get('last_manual_sync')),
|
|
"last_sync_status": daemon_status.get('last_sync_status', daemon_status.get('last_manual_sync_status')),
|
|
"files_watched": daemon_status.get('files_watched', 0),
|
|
"storage_used": daemon_status.get('storage_used'),
|
|
"error_count": daemon_status.get('error_count', 0),
|
|
"last_error": daemon_status.get('last_error'),
|
|
"cooldown_remaining": daemon_status.get('cooldown_remaining', 0),
|
|
"active_job": active_job,
|
|
"state": effective_state,
|
|
}
|
|
|
|
|
|
@router.post("/sync")
|
|
async def trigger_sync(user=Depends(get_current_user)):
|
|
"""Trigger a manual sync by delegating to the daemon via trigger file.
|
|
|
|
The sync runs inside the daemon process (cloud-backup-sync.service),
|
|
so it survives API/scheduler restarts. Progress is tracked via the
|
|
daemon's status file.
|
|
"""
|
|
from fastapi import HTTPException
|
|
|
|
config = _load_config()
|
|
if not config.get('key_id') or not config.get('bucket'):
|
|
raise HTTPException(status_code=400, detail="Cloud backup not configured")
|
|
|
|
# Check daemon is running
|
|
service_status = _get_service_status()
|
|
if service_status != "active":
|
|
raise HTTPException(status_code=503, detail=f"Cloud backup daemon is not running (status: {service_status})")
|
|
|
|
# Check daemon not already syncing (via status file)
|
|
daemon_status = _read_status_file()
|
|
if daemon_status.get('state') == 'syncing':
|
|
raise HTTPException(status_code=409, detail="A sync is already in progress")
|
|
|
|
# Check no trigger file already pending
|
|
if TRIGGER_FILE.exists():
|
|
raise HTTPException(status_code=409, detail="A sync trigger is already pending")
|
|
|
|
# Check for in-memory running jobs (race condition guard)
|
|
with _jobs_lock:
|
|
for job in _sync_jobs.values():
|
|
if job['status'] == 'running':
|
|
raise HTTPException(status_code=409, detail="A sync is already in progress")
|
|
|
|
job_id = str(uuid.uuid4())
|
|
|
|
# Write trigger file for daemon to pick up
|
|
trigger_data = {
|
|
"type": "sync",
|
|
"job_id": job_id,
|
|
"requested_at": datetime.now().isoformat(),
|
|
"requested_by": "api",
|
|
}
|
|
try:
|
|
TRIGGER_FILE.write_text(json.dumps(trigger_data))
|
|
except OSError as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed to write trigger file: {e}")
|
|
|
|
# Create in-memory job tracker for immediate API response
|
|
_create_sync_job(job_id)
|
|
|
|
return {"status": "started", "job_id": job_id}
|
|
|
|
|
|
@router.post("/pause")
|
|
async def pause_sync(user=Depends(get_current_user)):
|
|
"""Stop the cloud backup sync service."""
|
|
try:
|
|
result = subprocess.run(
|
|
["systemctl", "stop", SERVICE_NAME],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
if result.returncode != 0:
|
|
return {"status": "error", "message": result.stderr.strip()}
|
|
return {"status": "stopped"}
|
|
except Exception as e:
|
|
logger.error(f"Failed to stop sync service: {e}")
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
|
|
@router.post("/resume")
|
|
async def resume_sync(user=Depends(get_current_user)):
|
|
"""Start the cloud backup sync service."""
|
|
try:
|
|
result = subprocess.run(
|
|
["systemctl", "start", SERVICE_NAME],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
if result.returncode != 0:
|
|
return {"status": "error", "message": result.stderr.strip()}
|
|
return {"status": "started"}
|
|
except Exception as e:
|
|
logger.error(f"Failed to start sync service: {e}")
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
|
|
def _format_progress_summary(progress_line: str) -> str:
|
|
"""Format a [progress] line into a clean summary like ' Checked 108,024 / 118,056 files — 100% — 46.985 KiB/s'."""
|
|
parts = []
|
|
chk_m = re.search(r'chk#(\d+)/(\d+)', progress_line)
|
|
if chk_m:
|
|
parts.append(f"Checked {int(chk_m.group(1)):,} / {int(chk_m.group(2)):,} files")
|
|
xfr_m = re.search(r'xfr#(\d+)/(\d+)', progress_line)
|
|
if xfr_m:
|
|
parts.append(f"Transferred {int(xfr_m.group(1)):,} / {int(xfr_m.group(2)):,} files")
|
|
pct_m = re.search(r'(\d+)%', progress_line)
|
|
if pct_m:
|
|
parts.append(f"{pct_m.group(1)}%")
|
|
speed_m = re.search(r'([\d.]+\s*\S*i?B/s)', progress_line)
|
|
if speed_m:
|
|
parts.append(speed_m.group(1))
|
|
return f" {' — '.join(parts)}" if parts else ""
|
|
|
|
|
|
@router.get("/logs")
|
|
async def get_logs(lines: int = 200, user=Depends(get_current_user)):
|
|
"""Get last N lines from the cloud backup sync log, cleaned and summarized."""
|
|
if not LOG_FILE.exists():
|
|
return {"logs": "", "lines": 0}
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
["tail", "-n", str(min(lines * 10, 5000)), str(LOG_FILE)],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
# Whitelist-based parser: only emit lines matching known useful patterns.
|
|
# Everything else (daemon internals, tracebacks, config noise) is dropped.
|
|
output = []
|
|
last_progress = None
|
|
|
|
# Regex to strip daemon/universal-logger timestamp prefixes from any line
|
|
_prefix_re = re.compile(
|
|
r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}[.,]\d+\s+'
|
|
r'(?:\[MediaDownloader\.\w+\]\s+)?'
|
|
r'(?:\[(?:INFO|DEBUG|WARNING|ERROR)\]\s+)?'
|
|
r'(?:\[Core\]\s+\[(?:INFO|DEBUG|WARNING|ERROR)\]\s+)?'
|
|
)
|
|
|
|
for line in result.stdout.splitlines():
|
|
raw = line.strip()
|
|
if not raw:
|
|
continue
|
|
|
|
# Skip universal-logger formatted lines entirely (they duplicate
|
|
# the file_handler lines with a different format)
|
|
if '[MediaDownloader.' in raw:
|
|
continue
|
|
|
|
# Strip daemon log prefix to get the clean message
|
|
stripped = _prefix_re.sub('', raw).strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
# ── Capture rclone progress lines (emitted by daemon Popen loop) ──
|
|
if re.match(r'^\s*\[progress\]', stripped):
|
|
last_progress = stripped
|
|
continue
|
|
|
|
# ── Section headers: "[2026-03-22 01:13:58] Syncing paid/ (1/11)" ──
|
|
if re.match(r'^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s+', stripped):
|
|
# Flush pending progress summary before new section
|
|
if last_progress:
|
|
output.append(_format_progress_summary(last_progress))
|
|
last_progress = None
|
|
output.append(stripped)
|
|
continue
|
|
|
|
# ── Separator lines: "===..." ──
|
|
if re.match(r'^={10,}$', stripped):
|
|
output.append(stripped)
|
|
continue
|
|
|
|
# ── File transfer lines: "fname: Copied (new, replaced existing)" ──
|
|
m_xfer = re.match(r'^(.+?):\s*(Copied|Moved|Deleted)\s*(.*)', stripped)
|
|
if m_xfer:
|
|
fname = m_xfer.group(1).strip()
|
|
action = m_xfer.group(2)
|
|
detail = m_xfer.group(3).strip()
|
|
short = fname.split('/')[-1] if '/' in fname else fname
|
|
detail_short = f" ({detail})" if detail and detail != '(new)' else ''
|
|
output.append(f" {action}: {short}{detail_short}")
|
|
continue
|
|
|
|
# ── Our cleaned format: " Copied: filename.jpg" ──
|
|
m_clean = re.match(r'^\s*(Copied|Moved|Deleted):\s*(.+)', stripped)
|
|
if m_clean:
|
|
output.append(f" {m_clean.group(1)}: {m_clean.group(2).strip()}")
|
|
continue
|
|
|
|
# ── Completion lines from manual log writes: "Completed dir/" ──
|
|
if re.match(r'^Completed\s+\S+/', stripped):
|
|
output.append(stripped)
|
|
continue
|
|
|
|
# ── rclone sync errors ──
|
|
if re.match(r'^rclone sync failed', stripped):
|
|
if not any(stripped in prev for prev in output[-3:]):
|
|
output.append(f" ERROR: {stripped}")
|
|
continue
|
|
if re.match(r'^ERROR:', stripped):
|
|
if not any(stripped in prev for prev in output[-3:]):
|
|
output.append(stripped)
|
|
continue
|
|
|
|
# ── Sync summary lines: "Sync completed — 11 directories" ──
|
|
if re.match(r'^Sync (completed|completed with \d+ error)', stripped):
|
|
output.append(stripped)
|
|
continue
|
|
|
|
# ── pg_dump error (deduplicated) ──
|
|
if 'pg_dump failed' in stripped:
|
|
if not any('pg_dump failed' in prev for prev in output[-5:]):
|
|
output.append(f" ERROR: {stripped}")
|
|
continue
|
|
|
|
# Everything else is dropped (daemon internals, tracebacks, config noise, etc.)
|
|
|
|
# Flush trailing progress for active directory
|
|
if last_progress:
|
|
output.append(_format_progress_summary(last_progress))
|
|
|
|
# Return last N lines
|
|
final = output[-lines:] if len(output) > lines else output
|
|
log_content = '\n'.join(final)
|
|
return {"logs": log_content, "lines": len(final)}
|
|
except Exception as e:
|
|
return {"logs": f"Error reading logs: {e}", "lines": 0}
|
|
|
|
|
|
@router.post("/test")
|
|
async def test_connection(user=Depends(get_current_user)):
|
|
"""Test the rclone connection by listing the remote."""
|
|
config = _load_config()
|
|
if not config.get('key_id') or not config.get('application_key'):
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=400, detail="Cloud backup credentials not configured")
|
|
|
|
# Ensure rclone config is up to date
|
|
try:
|
|
_regenerate_rclone_config(config)
|
|
except Exception as e:
|
|
return {"status": "error", "message": f"Failed to write rclone config: {e}"}
|
|
|
|
# Test with rclone lsd
|
|
bucket = config.get('bucket', '')
|
|
encryption_enabled = config.get('encryption_enabled', True)
|
|
|
|
if encryption_enabled:
|
|
test_remote = f"{RCLONE_CRYPT_NAME}:"
|
|
else:
|
|
test_remote = f"{RCLONE_REMOTE_NAME}:{bucket}"
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
["rclone", "lsd", test_remote, "--config", str(RCLONE_CONF_PATH), "--max-depth", "1"],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
if result.returncode == 0:
|
|
dirs = [line.strip().split()[-1] for line in result.stdout.strip().splitlines() if line.strip()]
|
|
return {"status": "success", "message": "Connection successful", "remote_dirs": dirs}
|
|
else:
|
|
return {"status": "error", "message": result.stderr.strip() or "Connection failed"}
|
|
except subprocess.TimeoutExpired:
|
|
return {"status": "error", "message": "Connection timed out (30s)"}
|
|
except Exception as e:
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
|
|
@router.get("/job/{job_id}")
|
|
async def get_job(job_id: str, user=Depends(get_current_user)):
|
|
"""Get the status of a specific sync job.
|
|
|
|
Checks both in-memory tracking and the daemon's status file.
|
|
Daemon status takes priority when the job is actively running there.
|
|
"""
|
|
# Check daemon status file for this job
|
|
daemon_status = _read_status_file()
|
|
daemon_job_id = daemon_status.get('job_id')
|
|
|
|
if daemon_job_id == job_id:
|
|
if daemon_status.get('state') == 'syncing':
|
|
# Daemon is actively running this job
|
|
return {
|
|
'id': job_id,
|
|
'status': 'running',
|
|
'started_at': daemon_status.get('started_at'),
|
|
'completed_at': None,
|
|
'dirs_synced': daemon_status.get('dirs_synced', 0),
|
|
'dirs_total': daemon_status.get('dirs_total', 0),
|
|
'current_dir': daemon_status.get('current_dir'),
|
|
'current_file': daemon_status.get('current_file'),
|
|
'transfer_stats': daemon_status.get('transfer_stats'),
|
|
'phase': daemon_status.get('phase', 'syncing'),
|
|
'error': None,
|
|
}
|
|
elif daemon_status.get('completed_at'):
|
|
# Daemon completed this job
|
|
has_error = bool(daemon_status.get('sync_error'))
|
|
return {
|
|
'id': job_id,
|
|
'status': 'completed_with_errors' if has_error else 'completed',
|
|
'started_at': daemon_status.get('started_at'),
|
|
'completed_at': daemon_status.get('completed_at'),
|
|
'dirs_synced': daemon_status.get('dirs_synced', 0),
|
|
'dirs_total': daemon_status.get('dirs_total', 0),
|
|
'current_dir': None,
|
|
'current_file': None,
|
|
'transfer_stats': None,
|
|
'phase': 'completed',
|
|
'error': daemon_status.get('sync_error'),
|
|
}
|
|
|
|
# Fall back to in-memory job tracking
|
|
job = _get_sync_job(job_id)
|
|
if not job:
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=404, detail="Job not found")
|
|
return job
|
|
|
|
|
|
@router.get("/available-dirs")
|
|
async def get_available_dirs(user=Depends(get_current_user)):
|
|
"""List available directories under /opt/immich for backup."""
|
|
dirs = []
|
|
if IMMICH_BASE.is_dir():
|
|
for entry in sorted(IMMICH_BASE.iterdir()):
|
|
if entry.is_dir() and not entry.name.startswith('.'):
|
|
dirs.append(entry.name)
|
|
return {"dirs": dirs}
|
|
|
|
|
|
CLOUD_STATUS_CACHE_FILE = Path("/tmp/cloud_storage_status.json")
|
|
CLOUD_STATUS_REFRESH_INTERVAL = 6 * 3600 # 6 hours
|
|
|
|
_cloud_status_query: Dict[str, Any] = {"running": False}
|
|
_cloud_status_lock = Lock()
|
|
|
|
|
|
def _trigger_cloud_status_refresh():
|
|
"""Start a cloud status refresh in a background thread if not already running."""
|
|
import threading
|
|
with _cloud_status_lock:
|
|
if _cloud_status_query["running"]:
|
|
return
|
|
_cloud_status_query["running"] = True
|
|
t = threading.Thread(target=_run_cloud_status_query, daemon=True)
|
|
t.start()
|
|
|
|
|
|
def _cloud_status_periodic_loop():
|
|
"""Periodic loop that refreshes cloud status every 6 hours.
|
|
|
|
On startup, checks if the cached data is stale (older than the refresh
|
|
interval) and triggers an immediate refresh if so. This prevents restarts
|
|
from resetting the 6-hour countdown indefinitely.
|
|
"""
|
|
import time as _time
|
|
|
|
# Check staleness on startup — refresh immediately if cache is old
|
|
try:
|
|
_time.sleep(30) # brief delay to let API fully start
|
|
stale = True
|
|
if CLOUD_STATUS_CACHE_FILE.exists():
|
|
cached = json.loads(CLOUD_STATUS_CACHE_FILE.read_text())
|
|
queried_at = cached.get('queried_at')
|
|
if queried_at:
|
|
from datetime import datetime as _dt
|
|
age = (_dt.now() - _dt.fromisoformat(queried_at)).total_seconds()
|
|
stale = age > CLOUD_STATUS_REFRESH_INTERVAL
|
|
if stale:
|
|
config = _load_config()
|
|
if config.get('key_id') and config.get('bucket'):
|
|
logger.info("Cloud status cache is stale on startup, refreshing...")
|
|
_trigger_cloud_status_refresh()
|
|
except Exception as e:
|
|
logger.warning(f"Startup cloud status staleness check error: {e}")
|
|
|
|
while True:
|
|
_time.sleep(CLOUD_STATUS_REFRESH_INTERVAL)
|
|
try:
|
|
config = _load_config()
|
|
if config.get('key_id') and config.get('bucket'):
|
|
logger.info("Periodic cloud status refresh starting...")
|
|
_trigger_cloud_status_refresh()
|
|
except Exception as e:
|
|
logger.warning(f"Periodic cloud status refresh error: {e}")
|
|
|
|
|
|
# Start the periodic refresh thread
|
|
import threading as _threading
|
|
_cloud_status_timer = _threading.Thread(target=_cloud_status_periodic_loop, daemon=True)
|
|
_cloud_status_timer.start()
|
|
|
|
|
|
def _run_cloud_status_query():
|
|
"""Background worker: query rclone for cloud storage stats and cache to disk."""
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
config = _load_config()
|
|
bucket = config.get('bucket', '')
|
|
encryption_enabled = config.get('encryption_enabled', True)
|
|
provider = config.get('provider', 'b2')
|
|
endpoint = config.get('endpoint', '')
|
|
|
|
if encryption_enabled:
|
|
remote = f"{RCLONE_CRYPT_NAME}:"
|
|
else:
|
|
remote = f"{RCLONE_REMOTE_NAME}:{bucket}"
|
|
|
|
result = {
|
|
"provider": provider,
|
|
"bucket": bucket,
|
|
"endpoint": endpoint,
|
|
"encryption": encryption_enabled,
|
|
"total_files": None,
|
|
"total_size": None,
|
|
"total_size_human": None,
|
|
"directories": [],
|
|
"error": None,
|
|
"queried_at": datetime.now().isoformat(),
|
|
"query_status": "running",
|
|
}
|
|
|
|
def _rclone_size(target: str, timeout: int = 120) -> dict:
|
|
r = subprocess.run(
|
|
["rclone", "size", target, "--config", str(RCLONE_CONF_PATH), "--json"],
|
|
capture_output=True, text=True, timeout=timeout
|
|
)
|
|
if r.returncode == 0:
|
|
return json.loads(r.stdout)
|
|
return {"error": r.stderr.strip()[:100]}
|
|
|
|
def _get_dir_size(d: str) -> dict:
|
|
dir_remote = f"{remote}{d}" if remote.endswith(":") else f"{remote}/{d}"
|
|
try:
|
|
dd = _rclone_size(dir_remote, timeout=90)
|
|
if "error" in dd:
|
|
return {"name": d, "files": 0, "size": 0, "size_human": "error", "error": dd["error"]}
|
|
return {
|
|
"name": d,
|
|
"files": dd.get("count", 0),
|
|
"size": dd.get("bytes", 0),
|
|
"size_human": _format_bytes(dd.get("bytes", 0)),
|
|
}
|
|
except subprocess.TimeoutExpired:
|
|
return {"name": d, "files": 0, "size": 0, "size_human": "timeout"}
|
|
except Exception as e:
|
|
return {"name": d, "files": 0, "size": 0, "size_human": "error", "error": str(e)[:100]}
|
|
|
|
try:
|
|
# Save initial "running" state so frontend can show progress
|
|
CLOUD_STATUS_CACHE_FILE.write_text(json.dumps(result))
|
|
|
|
lsd_result = subprocess.run(
|
|
["rclone", "lsd", remote, "--config", str(RCLONE_CONF_PATH), "--max-depth", "1"],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
if lsd_result.returncode != 0:
|
|
result["error"] = lsd_result.stderr.strip()[:200]
|
|
result["query_status"] = "error"
|
|
CLOUD_STATUS_CACHE_FILE.write_text(json.dumps(result))
|
|
return
|
|
|
|
dir_names = []
|
|
for line in lsd_result.stdout.strip().splitlines():
|
|
parts = line.strip().split()
|
|
if parts:
|
|
dir_names.append(parts[-1])
|
|
|
|
with ThreadPoolExecutor(max_workers=6) as executor:
|
|
dir_futures = {executor.submit(_get_dir_size, d): d for d in sorted(dir_names)}
|
|
|
|
dir_infos = []
|
|
for future in as_completed(dir_futures, timeout=300):
|
|
dir_infos.append(future.result())
|
|
|
|
# Compute totals from per-directory results (avoids slow rclone size on entire remote)
|
|
total_files = sum(d.get("files", 0) for d in dir_infos)
|
|
total_bytes = sum(d.get("size", 0) for d in dir_infos)
|
|
result["total_files"] = total_files
|
|
result["total_size"] = total_bytes
|
|
result["total_size_human"] = _format_bytes(total_bytes)
|
|
result["directories"] = dir_infos
|
|
result["query_status"] = "complete"
|
|
|
|
except Exception as e:
|
|
result["error"] = str(e)[:200]
|
|
result["query_status"] = "error"
|
|
finally:
|
|
result["queried_at"] = datetime.now().isoformat()
|
|
CLOUD_STATUS_CACHE_FILE.write_text(json.dumps(result))
|
|
|
|
with _cloud_status_lock:
|
|
_cloud_status_query["running"] = False
|
|
|
|
|
|
@router.get("/cloud-status")
|
|
async def get_cloud_status(user=Depends(get_current_user)):
|
|
"""Return cached cloud storage stats. Returns instantly."""
|
|
config = _load_config()
|
|
if not config.get('key_id') or not config.get('bucket'):
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=400, detail="Cloud backup not configured")
|
|
|
|
# Return cached results if available
|
|
cached = {}
|
|
try:
|
|
if CLOUD_STATUS_CACHE_FILE.exists():
|
|
cached = json.loads(CLOUD_STATUS_CACHE_FILE.read_text())
|
|
except (json.JSONDecodeError, OSError):
|
|
pass
|
|
|
|
with _cloud_status_lock:
|
|
is_running = _cloud_status_query["running"]
|
|
|
|
if cached:
|
|
cached["query_running"] = is_running
|
|
return cached
|
|
|
|
return {
|
|
"provider": config.get('provider', 'b2'),
|
|
"bucket": config.get('bucket', ''),
|
|
"endpoint": config.get('endpoint', ''),
|
|
"encryption": config.get('encryption_enabled', True),
|
|
"total_files": None,
|
|
"total_size": None,
|
|
"total_size_human": None,
|
|
"directories": [],
|
|
"error": None,
|
|
"queried_at": None,
|
|
"query_status": "never",
|
|
"query_running": is_running,
|
|
}
|
|
|
|
|
|
@router.post("/cloud-status/refresh")
|
|
async def refresh_cloud_status(background_tasks: BackgroundTasks, user=Depends(get_current_user)):
|
|
"""Trigger a background refresh of cloud storage stats."""
|
|
config = _load_config()
|
|
if not config.get('key_id') or not config.get('bucket'):
|
|
from fastapi import HTTPException
|
|
raise HTTPException(status_code=400, detail="Cloud backup not configured")
|
|
|
|
with _cloud_status_lock:
|
|
if _cloud_status_query["running"]:
|
|
return {"status": "already_running"}
|
|
_cloud_status_query["running"] = True
|
|
|
|
background_tasks.add_task(_run_cloud_status_query)
|
|
return {"status": "started"}
|
|
|
|
|
|
def _format_bytes(b: int) -> str:
|
|
"""Format bytes into human-readable string."""
|
|
if b is None or b == 0:
|
|
return "0 B"
|
|
for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']:
|
|
if abs(b) < 1024.0:
|
|
return f"{b:.1f} {unit}" if unit != 'B' else f"{b} B"
|
|
b /= 1024.0
|
|
return f"{b:.1f} EB"
|