Files
media-downloader/web/backend/routers/cloud_backup.py
Todd 523f91788e Fix DB paths, add auth to sensitive endpoints, misc bug fixes
- scheduler.py: Use full path for scheduler_state.db instead of relative name
- recycle.py: Use full path for thumbnails.db instead of relative name
- cloud_backup.py, maintenance.py, stats.py: Require admin for config/cleanup/settings endpoints
- press.py: Add auth to press image serving endpoint
- private_gallery.py: Fix _create_pg_job call and add missing secrets import
- appearances.py: Use sync httpx instead of asyncio.run for background thread HTTP call

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 08:25:00 -04:00

1526 lines
59 KiB
Python

"""
Cloud Backup Router
Manages cloud backup configuration (rclone-based sync to B2/S3/etc.):
- Config CRUD with sensitive field masking
- rclone.conf generation (preserves existing sections)
- Sync control (trigger, pause, resume)
- Status and log retrieval
- Connection testing
"""
import json
import os
import re
import subprocess
import tempfile
import uuid
from datetime import datetime
from pathlib import Path
from threading import Lock
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, BackgroundTasks, Depends
from pydantic import BaseModel, Field
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from modules.universal_logger import get_logger
logger = get_logger('CloudBackup')
router = APIRouter(prefix="/api/cloud-backup", tags=["Cloud Backup"])
limiter = Limiter(key_func=get_remote_address)
# ============================================================================
# CONSTANTS
# ============================================================================
RCLONE_CONF_PATH = Path("/root/.config/rclone/rclone.conf")
RCLONE_REMOTE_NAME = "cloud-backup-remote"
RCLONE_CRYPT_NAME = "cloud-backup-crypt"
STATUS_FILE = Path("/tmp/cloud_backup_status.json")
TRIGGER_FILE = Path("/tmp/cloud_backup_trigger.json")
LOG_FILE = Path("/opt/media-downloader/logs/cloud_backup.log")
IMMICH_BASE = Path("/opt/immich")
SETTINGS_KEY = "cloud_backup"
SERVICE_NAME = "cloud-backup-sync.service"
SENSITIVE_FIELDS = {"key_id", "application_key", "encryption_password", "encryption_salt"}
MASK_VALUE = "****"
DEFAULT_INCLUDE_DIRS = ["paid", "private", "md", "el", "elv", "ela", "upload", "review", "recycle", "db_dumps", "app_backup"]
DEFAULT_EXCLUDE_DIRS = ["lost+found", "db.old"]
# ============================================================================
# JOB TRACKING FOR BACKGROUND SYNC
# ============================================================================
_sync_jobs: Dict[str, Dict] = {}
_jobs_lock = Lock()
def _get_sync_job(job_id: str) -> Optional[Dict]:
with _jobs_lock:
return _sync_jobs.get(job_id)
def _update_sync_job(job_id: str, updates: Dict):
with _jobs_lock:
if job_id in _sync_jobs:
_sync_jobs[job_id].update(updates)
def _create_sync_job(job_id: str) -> Dict:
with _jobs_lock:
_sync_jobs[job_id] = {
'id': job_id,
'status': 'running',
'started_at': datetime.now().isoformat(),
'completed_at': None,
'dirs_synced': 0,
'dirs_total': 0,
'current_dir': None,
'current_file': None,
'transfer_stats': None,
'phase': 'preparing',
'error': None,
}
return _sync_jobs[job_id]
def _cleanup_old_sync_jobs():
with _jobs_lock:
now = datetime.now()
to_remove = []
for job_id, job in _sync_jobs.items():
if job.get('completed_at'):
try:
completed = datetime.fromisoformat(job['completed_at'])
if (now - completed).total_seconds() > 3600:
to_remove.append(job_id)
except (ValueError, TypeError):
pass
for job_id in to_remove:
del _sync_jobs[job_id]
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class CloudBackupConfigModel(BaseModel):
enabled: bool = False
provider: str = "b2"
endpoint: str = ""
bucket: str = ""
key_id: str = ""
application_key: str = ""
encryption_enabled: bool = True
encryption_password: str = ""
encryption_salt: str = ""
include_dirs: List[str] = Field(default_factory=lambda: list(DEFAULT_INCLUDE_DIRS))
exclude_dirs: List[str] = Field(default_factory=lambda: list(DEFAULT_EXCLUDE_DIRS))
cooldown_seconds: int = 300
bandwidth_limit: Optional[str] = None
class CloudBackupConfigUpdate(BaseModel):
enabled: Optional[bool] = None
provider: Optional[str] = None
endpoint: Optional[str] = None
bucket: Optional[str] = None
key_id: Optional[str] = None
application_key: Optional[str] = None
encryption_enabled: Optional[bool] = None
encryption_password: Optional[str] = None
encryption_salt: Optional[str] = None
include_dirs: Optional[List[str]] = None
exclude_dirs: Optional[List[str]] = None
cooldown_seconds: Optional[int] = None
bandwidth_limit: Optional[str] = None
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def _get_settings_manager():
"""Get SettingsManager from AppState."""
app_state = get_app_state()
return app_state.settings
def _load_config() -> dict:
"""Load cloud backup config from settings DB."""
sm = _get_settings_manager()
stored = sm.get(SETTINGS_KEY)
if stored and isinstance(stored, dict):
return stored
return CloudBackupConfigModel().model_dump()
def _save_config(config: dict):
"""Save cloud backup config to settings DB."""
sm = _get_settings_manager()
sm.set(SETTINGS_KEY, config, category='cloud_backup', description='Cloud backup configuration')
def _mask_config(config: dict) -> dict:
"""Return config with sensitive fields masked."""
masked = dict(config)
for field in SENSITIVE_FIELDS:
val = masked.get(field)
if val and val != MASK_VALUE:
masked[field] = MASK_VALUE
elif not val:
masked[field] = None
return masked
def _merge_config_update(existing: dict, update: dict) -> dict:
"""Merge update into existing config, treating MASK_VALUE as 'keep existing'."""
merged = dict(existing)
for key, val in update.items():
if val is None:
continue
if key in SENSITIVE_FIELDS and val == MASK_VALUE:
continue # keep existing
merged[key] = val
return merged
def _rclone_obscure(password: str) -> str:
"""Obscure a password using rclone obscure."""
if not password:
return ""
result = subprocess.run(
["rclone", "obscure", password],
capture_output=True, text=True, timeout=10
)
if result.returncode != 0:
raise RuntimeError(f"rclone obscure failed: {result.stderr.strip()}")
return result.stdout.strip()
def _regenerate_rclone_config(config: dict):
"""
Regenerate rclone.conf preserving all existing sections
except cloud-backup-remote and cloud-backup-crypt.
"""
RCLONE_CONF_PATH.parent.mkdir(parents=True, exist_ok=True)
# Read existing config
existing_content = ""
if RCLONE_CONF_PATH.exists():
existing_content = RCLONE_CONF_PATH.read_text()
# Parse existing sections (preserve all except our managed ones)
sections = {}
current_section = None
current_lines = []
for line in existing_content.splitlines():
section_match = re.match(r'^\[(.+)\]$', line.strip())
if section_match:
if current_section is not None:
sections[current_section] = '\n'.join(current_lines)
current_section = section_match.group(1)
current_lines = [line]
elif current_section is not None:
current_lines.append(line)
# Lines before any section are ignored (shouldn't exist in rclone.conf)
if current_section is not None:
sections[current_section] = '\n'.join(current_lines)
# Remove our managed sections
sections.pop(RCLONE_REMOTE_NAME, None)
sections.pop(RCLONE_CRYPT_NAME, None)
# Build new sections
provider = config.get('provider', 'b2')
new_sections = []
if provider == 'b2':
new_sections.append(f"""[{RCLONE_REMOTE_NAME}]
type = b2
account = {config.get('key_id', '')}
key = {config.get('application_key', '')}
endpoint = {config.get('endpoint', '')}""")
else:
# S3-compatible fallback
new_sections.append(f"""[{RCLONE_REMOTE_NAME}]
type = s3
provider = Other
access_key_id = {config.get('key_id', '')}
secret_access_key = {config.get('application_key', '')}
endpoint = {config.get('endpoint', '')}""")
if config.get('encryption_enabled', True):
enc_pass = _rclone_obscure(config.get('encryption_password', ''))
enc_salt = _rclone_obscure(config.get('encryption_salt', ''))
bucket = config.get('bucket', '')
new_sections.append(f"""[{RCLONE_CRYPT_NAME}]
type = crypt
remote = {RCLONE_REMOTE_NAME}:{bucket}
password = {enc_pass}
password2 = {enc_salt}
filename_encryption = standard
directory_name_encryption = true""")
# Write config atomically
output_parts = []
for section_name, section_content in sections.items():
output_parts.append(section_content)
for section in new_sections:
output_parts.append(section)
final_content = '\n\n'.join(output_parts) + '\n'
fd, tmp_path = tempfile.mkstemp(dir=str(RCLONE_CONF_PATH.parent), suffix='.tmp')
try:
with os.fdopen(fd, 'w') as f:
f.write(final_content)
os.chmod(tmp_path, 0o600)
os.rename(tmp_path, str(RCLONE_CONF_PATH))
logger.info("rclone.conf regenerated successfully")
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
def _read_status_file() -> dict:
"""Read the daemon's status JSON file."""
try:
if STATUS_FILE.exists():
data = json.loads(STATUS_FILE.read_text())
return data
except (json.JSONDecodeError, OSError) as e:
logger.warning(f"Failed to read status file: {e}")
return {}
def _get_service_status() -> str:
"""Check systemd service status."""
try:
result = subprocess.run(
["systemctl", "is-active", SERVICE_NAME],
capture_output=True, text=True, timeout=5
)
return result.stdout.strip() # active, inactive, failed, etc.
except Exception:
return "unknown"
def _find_ssd_mount() -> Optional[Path]:
"""Auto-detect the SSD underlying a mergerfs mount at IMMICH_BASE.
Reads /proc/mounts to find the mergerfs entry, resolves drive names
to mount paths, then checks each drive's rotational flag (ROTA=0 = SSD).
"""
try:
# Build a map of all mount points: mountpoint -> (device, ...)
mounts = {}
with open('/proc/mounts') as f:
for line in f:
parts = line.split()
if len(parts) >= 2 and parts[0].startswith('/dev/'):
mounts[parts[1]] = parts[0]
with open('/proc/mounts') as f:
for line in f:
parts = line.split()
if len(parts) >= 3 and parts[1] == str(IMMICH_BASE) and 'mergerfs' in parts[2]:
# Source can be "samsung2tb:onetouch4tb" or "/mnt/a:/mnt/b"
drive_names = parts[0].split(':')
for name in drive_names:
name = name.strip()
if not name:
continue
# Try as-is first (absolute path), then /mnt/<name>
candidates = [name, f'/mnt/{name}']
for drive_path in candidates:
if drive_path in mounts:
dev = mounts[drive_path]
dev_base = dev.rstrip('0123456789')
dev_name = dev_base.split('/')[-1]
rotational = Path(f'/sys/block/{dev_name}/queue/rotational')
if rotational.exists() and rotational.read_text().strip() == '0':
return Path(drive_path)
except Exception:
pass
return None
def _get_fast_storage_base() -> Path:
"""Get the fastest storage path for dumps.
Returns the SSD mount under mergerfs if detected, otherwise falls back
to the mergerfs path itself. Dirs created here are visible via mergerfs
without any cross-filesystem copies.
"""
ssd = _find_ssd_mount()
if ssd:
return ssd
return IMMICH_BASE
_FAST_BASE = _get_fast_storage_base()
DB_DUMPS_DIR = _FAST_BASE / "db_dumps"
APP_BACKUP_DIR = _FAST_BASE / "app_backup"
# Systemd service files to back up
SYSTEMD_FILES = [
"media-downloader.service",
"media-downloader-api.service",
"media-downloader-frontend.service",
"media-downloader-db-cleanup.service",
"media-downloader-db-cleanup.timer",
"xvfb-media-downloader.service",
"cloud-backup-sync.service",
]
def _run_pre_sync_dumps(job_id: str = None, include_immich_db: bool = True, include_app_archive: bool = True):
"""Dump databases and archive app + system configs before syncing.
Args:
include_immich_db: If False, skip Immich DB dump (only needed daily).
include_app_archive: If False, skip app tar.gz + system configs (only needed daily).
"""
DB_DUMPS_DIR.mkdir(parents=True, exist_ok=True)
APP_BACKUP_DIR.mkdir(parents=True, exist_ok=True)
errors = []
import shutil
import time as _time
def _status(msg):
if job_id:
_update_sync_job(job_id, {'current_dir': msg})
def _get_dir_size(path):
"""Get total size of a directory in bytes."""
try:
return sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
except OSError:
return 0
def _get_docker_dir_size(container, path):
"""Get size of a directory inside a docker container."""
try:
r = subprocess.run(
["docker", "exec", container, "du", "-sb", path],
capture_output=True, text=True, timeout=5
)
if r.returncode == 0:
return int(r.stdout.split()[0])
except Exception:
pass
return 0
def _run_with_progress(cmd, label, total_bytes, size_fn, env=None):
"""Run a subprocess while polling output size for progress updates."""
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
while proc.poll() is None:
_time.sleep(3)
current = size_fn()
if total_bytes > 0:
pct = min(99, int(current / total_bytes * 100))
_status(f"{label} ({pct}%)")
stdout, stderr = proc.communicate()
return proc.returncode, stderr.decode() if stderr else ""
# ── 1. Database dumps ──────────────────────────────────────────────
# Immich PostgreSQL (Docker container) — only during daily/full backup
if include_immich_db:
try:
_status("Dumping Immich database (0%)...")
logger.info("Dumping Immich database...")
# Get DB size for progress tracking
sz_result = subprocess.run(
["docker", "exec", "immich_postgres",
"psql", "-U", "postgres", "-d", "immich", "-tAc",
"SELECT pg_database_size('immich')"],
capture_output=True, text=True, timeout=10
)
immich_db_size = int(sz_result.stdout.strip()) if sz_result.returncode == 0 else 0
# Clean previous dump dir
subprocess.run(
["docker", "exec", "immich_postgres", "rm", "-rf", "/tmp/immich_dump"],
capture_output=True, text=True, timeout=10
)
rc, stderr = _run_with_progress(
["docker", "exec", "immich_postgres",
"pg_dump", "-U", "postgres", "-d", "immich",
"--no-owner", "--no-acl", "-Fd", "-j", "4",
"--exclude-table-data=face_search",
"--exclude-table-data=smart_search",
"-f", "/tmp/immich_dump"],
"Dumping Immich DB",
immich_db_size,
lambda: _get_docker_dir_size("immich_postgres", "/tmp/immich_dump"),
)
if rc == 0:
_status("Copying Immich dump...")
subprocess.run(
["docker", "exec", "immich_postgres",
"tar", "cf", "/tmp/immich_dump.tar", "-C", "/tmp", "immich_dump"],
capture_output=True, text=True, timeout=120
)
# docker cp directly to SSD-backed DB_DUMPS_DIR (no cross-fs move needed)
final_dest = str(DB_DUMPS_DIR / "immich_dump.tar")
subprocess.run(
["docker", "cp", "immich_postgres:/tmp/immich_dump.tar", final_dest],
capture_output=True, text=True, timeout=120
)
subprocess.run(
["docker", "exec", "immich_postgres",
"sh", "-c", "rm -rf /tmp/immich_dump /tmp/immich_dump.tar"],
capture_output=True, text=True, timeout=10
)
size_mb = Path(final_dest).stat().st_size / 1e6
logger.info(f"Immich DB dump: {size_mb:.1f} MB")
else:
errors.append(f"Immich pg_dump failed: {stderr[:200]}")
logger.error(f"Immich pg_dump failed: {stderr[:200]}")
except Exception as e:
errors.append(f"Immich pg_dump error: {e}")
logger.error(f"Immich pg_dump error: {e}")
# Media Downloader PostgreSQL (local) — parallel directory-format dump
# Uses --lock-wait-timeout to avoid hanging when scheduler holds locks,
# with a serial (-j 1) retry if parallel dump fails on lock contention.
try:
_status("Dumping Media Downloader DB (0%)...")
logger.info("Dumping Media Downloader database...")
# Get DB size
env = os.environ.copy()
env["PGPASSWORD"] = "PNsihOXvvuPwWiIvGlsc9Fh2YmMmB"
sz_result = subprocess.run(
["psql", "-h", "localhost", "-U", "media_downloader",
"-d", "media_downloader", "-tAc",
"SELECT pg_database_size('media_downloader')"],
capture_output=True, text=True, timeout=10, env=env
)
md_db_size = int(sz_result.stdout.strip()) if sz_result.returncode == 0 else 0
# Dump directly to destination (cross-fs move is slower than direct write)
final_dump_dir = str(DB_DUMPS_DIR / "media_downloader_dump")
if Path(final_dump_dir).exists():
shutil.rmtree(final_dump_dir)
base_cmd = [
"pg_dump", "-h", "localhost", "-U", "media_downloader",
"-d", "media_downloader", "--no-owner", "--no-acl",
"--exclude-table-data=thumbnails",
"--lock-wait-timeout=30000",
]
rc, stderr = _run_with_progress(
base_cmd + ["-Fd", "-j", "4", "-f", final_dump_dir],
"Dumping Media Downloader DB",
md_db_size,
lambda: _get_dir_size(final_dump_dir),
env=env,
)
if rc != 0 and "could not obtain lock" in stderr:
# Lock contention — retry serial after a short wait
logger.warning("Parallel pg_dump hit lock contention, retrying serial in 15s...")
_status("DB lock contention, retrying...")
if Path(final_dump_dir).exists():
shutil.rmtree(final_dump_dir)
import time as _time2
_time2.sleep(15)
rc, stderr = _run_with_progress(
base_cmd + ["-Fd", "-j", "1", "-f", final_dump_dir],
"Dumping Media Downloader DB (retry)",
md_db_size,
lambda: _get_dir_size(final_dump_dir),
env=env,
)
if rc == 0:
total = _get_dir_size(final_dump_dir)
logger.info(f"Media Downloader DB dump: {total / 1e6:.1f} MB")
else:
errors.append(f"Media Downloader pg_dump failed: {stderr[:200]}")
logger.error(f"Media Downloader pg_dump failed: {stderr[:200]}")
except Exception as e:
errors.append(f"Media Downloader pg_dump error: {e}")
logger.error(f"Media Downloader pg_dump error: {e}")
# ── 2-6: App archive + system configs (daily only) ─────────────────
if include_app_archive:
try:
_status("Archiving media-downloader app...")
logger.info("Archiving media-downloader app...")
# Write directly to SSD-backed APP_BACKUP_DIR (no cross-fs move)
final_archive = APP_BACKUP_DIR / "media-downloader-app.tar.gz"
result = subprocess.run(
["tar", "czf", str(final_archive),
"--exclude=./venv",
"--exclude=./web/frontend/node_modules",
"--exclude=./logs",
"--exclude=./cache/thumbnails",
"--exclude=./__pycache__",
"--exclude=./.git",
"--exclude=./temp",
"-C", "/opt", "media-downloader"],
capture_output=True, text=True, timeout=600
)
if result.returncode == 0:
size_mb = final_archive.stat().st_size / 1e6
logger.info(f"App archive: {size_mb:.1f} MB")
else:
errors.append(f"App archive failed: {result.stderr[:200]}")
logger.error(f"App archive failed: {result.stderr[:200]}")
except Exception as e:
errors.append(f"App archive error: {e}")
logger.error(f"App archive error: {e}")
try:
services_dir = APP_BACKUP_DIR / "systemd"
services_dir.mkdir(parents=True, exist_ok=True)
copied = 0
for svc in SYSTEMD_FILES:
src = Path(f"/etc/systemd/system/{svc}")
if src.exists():
(services_dir / svc).write_text(src.read_text())
copied += 1
logger.info(f"Copied {copied} systemd service files")
except Exception as e:
errors.append(f"Systemd backup error: {e}")
logger.error(f"Systemd backup error: {e}")
try:
if RCLONE_CONF_PATH.exists():
(APP_BACKUP_DIR / "rclone.conf").write_text(RCLONE_CONF_PATH.read_text())
logger.info("Copied rclone.conf")
except Exception as e:
errors.append(f"rclone config backup error: {e}")
logger.error(f"rclone config backup error: {e}")
try:
compose_src = IMMICH_BASE / "docker-compose.yml"
if compose_src.exists():
(APP_BACKUP_DIR / "immich-docker-compose.yml").write_text(compose_src.read_text())
env_src = IMMICH_BASE / ".env"
if env_src.exists():
(APP_BACKUP_DIR / "immich-env").write_text(env_src.read_text())
logger.info("Copied Immich docker-compose + .env")
except Exception as e:
errors.append(f"Immich compose backup error: {e}")
logger.error(f"Immich compose backup error: {e}")
try:
restore_src = Path("/opt/media-downloader/scripts/cloud_backup_restore.sh")
if restore_src.exists():
(APP_BACKUP_DIR / "RESTORE.sh").write_text(restore_src.read_text())
logger.info("Copied restore script as RESTORE.sh")
except Exception as e:
errors.append(f"Restore script backup error: {e}")
logger.error(f"Restore script backup error: {e}")
return errors
def _run_full_backup(config: dict, job_id: str):
"""Run Media Downloader DB dump + rclone sync.
Immich DB + app archive are daily-only (handled by daemon at 3 AM).
"""
dump_errors = _run_pre_sync_dumps(job_id=job_id, include_immich_db=False, include_app_archive=False)
_run_file_sync(config, job_id, extra_errors=dump_errors)
def _parse_rclone_stats(line: str) -> Optional[Dict]:
"""Parse an rclone --stats-one-line output line into structured data.
Actual rclone format:
INFO : 1.424 GiB / 1.424 GiB, 100%, 0 B/s, ETA -
INFO : 1.424 GiB / 1.424 GiB, 100%, 10.5 MiB/s, ETA 7m30s, Checks: 100, Transferred: 15 / 45
"""
stats = {}
try:
# Extract: bytes_done / bytes_total, pct%, speed
m = re.search(r'([\d.]+\s*\S*?B)\s*/\s*([\d.]+\s*\S*?B),\s*(\d+)%,\s*([\d.]+\s*\S*?/s)', line)
if m:
stats['bytes_done'] = m.group(1).strip()
stats['bytes_total'] = m.group(2).strip()
stats['pct'] = int(m.group(3))
stats['speed'] = m.group(4).strip()
# Extract ETA
m_eta = re.search(r'ETA\s+([\w.]+)', line)
if m_eta and m_eta.group(1) != '-':
stats['eta'] = m_eta.group(1)
# Extract file count: "Transferred: 15 / 45" at end of one-line stats
m_files = re.search(r'Transferred:\s*(\d+)\s*/\s*(\d+)', line)
if m_files:
stats['files_done'] = int(m_files.group(1))
stats['files_total'] = int(m_files.group(2))
# Extract checks count — "Checks: 100" or "(chk#573/582)"
m_checks = re.search(r'Checks:\s*(\d+)', line)
if m_checks:
stats['checks_done'] = int(m_checks.group(1))
m_chk = re.search(r'chk#(\d+)/(\d+)', line)
if m_chk:
stats['checks_done'] = int(m_chk.group(1))
stats['checks_total'] = int(m_chk.group(2))
except (ValueError, AttributeError):
pass
return stats if stats else None
def _run_file_sync(config: dict, job_id: str, extra_errors: list = None):
"""Run rclone sync for all included directories with live transfer stats."""
include_dirs = config.get('include_dirs', DEFAULT_INCLUDE_DIRS)
encryption_enabled = config.get('encryption_enabled', True)
bandwidth_limit = config.get('bandwidth_limit')
bucket = config.get('bucket', '')
dirs_to_sync = [d for d in include_dirs if (IMMICH_BASE / d).is_dir()]
_update_sync_job(job_id, {'dirs_total': len(dirs_to_sync), 'transfer_stats': None})
errors = list(extra_errors) if extra_errors else []
for i, dir_name in enumerate(dirs_to_sync):
_update_sync_job(job_id, {
'current_dir': dir_name, 'dirs_synced': i,
'transfer_stats': None, 'current_file': None,
'phase': 'checking',
})
src = str(IMMICH_BASE / dir_name)
if encryption_enabled:
dest = f"{RCLONE_CRYPT_NAME}:{dir_name}"
else:
dest = f"{RCLONE_REMOTE_NAME}:{bucket}/{dir_name}"
cmd = [
"rclone", "sync", src, dest,
"--config", str(RCLONE_CONF_PATH),
"--stats", "3s",
"--stats-one-line",
"-v",
"--transfers", "4",
"--checkers", "16",
"--fast-list",
]
if bandwidth_limit:
cmd.extend(["--bwlimit", bandwidth_limit])
try:
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
log_fh = open(str(LOG_FILE), 'a')
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
log_fh.write(f"\n[{ts}] Syncing {dir_name}/ ({i+1}/{len(dirs_to_sync)})\n")
log_fh.flush()
try:
for line in proc.stderr:
stripped = line.strip()
if not stripped:
continue
# Clean rclone output: strip syslog prefix "<6>INFO :" → clean message
clean = re.sub(r'^<\d+>', '', stripped) # strip syslog priority
clean = re.sub(r'^(INFO|DEBUG|ERROR|NOTICE)\s*:\s*', '', clean).strip()
# Detect progress/stats lines (with % or chk# markers)
is_progress = ('%,' in stripped and '/s' in stripped) or 'chk#' in stripped
# Write cleaned output to log file
if is_progress:
# Progress line — only log the latest (overwrite style)
log_fh.write(f" [progress] {clean}\n")
elif ': Copied' in stripped or ': Moved' in stripped:
m = re.search(r'(.+?):\s*(Copied|Moved)\s*(.*)', clean)
if m:
fname = m.group(1).strip()
action = m.group(2)
detail = m.group(3).strip()
short = fname.split('/')[-1] if '/' in fname else fname
log_fh.write(f" {action}: {short} {detail}\n")
else:
log_fh.write(f" {clean}\n")
elif 'Checks:' in clean and 'Transferred:' in clean:
# Summary line at end of dir sync
log_fh.write(f" {clean}\n")
elif clean:
log_fh.write(f" {clean}\n")
log_fh.flush()
# Update live stats for frontend
if is_progress:
stats = _parse_rclone_stats(stripped)
if stats:
# Determine phase from stats
phase = 'checking'
if stats.get('files_done', 0) > 0 or stats.get('files_total', 0) > 0:
phase = 'transferring'
elif stats.get('pct', 0) > 0:
phase = 'transferring'
_update_sync_job(job_id, {'transfer_stats': stats, 'phase': phase})
if ': Copied' in stripped or ': Moved' in stripped:
m = re.search(r'INFO\s*:\s*(.+?):\s*(?:Copied|Moved)', stripped)
if m:
fname = m.group(1).strip()
short = fname.split('/')[-1] if '/' in fname else fname
_update_sync_job(job_id, {'current_file': short, 'phase': 'transferring'})
finally:
log_fh.close()
proc.wait()
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
if proc.returncode != 0:
err_msg = f"Sync failed for {dir_name}: exit code {proc.returncode}"
errors.append(err_msg)
logger.error(err_msg)
with open(str(LOG_FILE), 'a') as f:
f.write(f"[{ts}] ERROR: {dir_name}/ failed (exit {proc.returncode})\n")
else:
with open(str(LOG_FILE), 'a') as f:
f.write(f"[{ts}] Completed {dir_name}/\n")
except Exception as e:
errors.append(f"Sync error for {dir_name}: {e}")
logger.error(f"Sync error for {dir_name}: {e}")
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
status_msg = 'completed' if not errors else f'completed with {len(errors)} error(s)'
with open(str(LOG_FILE), 'a') as f:
f.write(f"\n[{ts}] Sync {status_msg}{len(dirs_to_sync)} directories\n{'='*60}\n")
completed_status = 'completed' if not errors else 'completed_with_errors'
_update_sync_job(job_id, {
'status': completed_status,
'completed_at': datetime.now().isoformat(),
'dirs_synced': len(dirs_to_sync),
'current_dir': None,
'error': '; '.join(errors) if errors else None,
})
# Write status file for daemon/dashboard
_write_manual_sync_status(completed_status, errors)
_cleanup_old_sync_jobs()
# Auto-refresh cloud storage stats after sync
_trigger_cloud_status_refresh()
def _write_manual_sync_status(status: str, errors: list):
"""Write sync completion info to status file for dashboard consumption."""
try:
existing = _read_status_file()
existing['last_manual_sync'] = datetime.now().isoformat()
existing['last_manual_sync_status'] = status
if errors:
existing['last_manual_sync_errors'] = errors
STATUS_FILE.write_text(json.dumps(existing))
except OSError as e:
logger.warning(f"Failed to write status file: {e}")
# ============================================================================
# API ENDPOINTS
# ============================================================================
@router.get("/config")
async def get_config(user=Depends(get_current_user)):
"""Get cloud backup configuration (sensitive fields masked)."""
config = _load_config()
return _mask_config(config)
@router.put("/config")
async def update_config(update: CloudBackupConfigUpdate, user=Depends(require_admin)):
"""Save cloud backup configuration and regenerate rclone.conf."""
existing = _load_config()
update_dict = update.model_dump(exclude_unset=True)
merged = _merge_config_update(existing, update_dict)
# Validate required fields if enabling
if merged.get('enabled'):
missing = []
if not merged.get('endpoint'):
missing.append('endpoint')
if not merged.get('bucket'):
missing.append('bucket')
if not merged.get('key_id'):
missing.append('key_id')
if not merged.get('application_key'):
missing.append('application_key')
if merged.get('encryption_enabled'):
if not merged.get('encryption_password'):
missing.append('encryption_password')
if not merged.get('encryption_salt'):
missing.append('encryption_salt')
if missing:
from fastapi import HTTPException
raise HTTPException(status_code=400, detail=f"Missing required fields: {', '.join(missing)}")
_save_config(merged)
# Regenerate rclone config if we have credentials
if merged.get('key_id') and merged.get('application_key'):
try:
_regenerate_rclone_config(merged)
except Exception as e:
logger.error(f"Failed to regenerate rclone config: {e}")
return {"status": "saved", "rclone_config": "failed", "error": str(e), "config": _mask_config(merged)}
return {"status": "saved", "rclone_config": "updated", "config": _mask_config(merged)}
@router.get("/status")
async def get_status(user=Depends(get_current_user)):
"""Get sync status including service state, last sync time, storage info.
Reads job progress from the daemon's status file. When the daemon is
running a manual sync (has job_id), its progress fields take priority
over in-memory job tracking.
"""
config = _load_config()
service_status = _get_service_status()
daemon_status = _read_status_file()
active_job = None
daemon_job_id = daemon_status.get('job_id')
# Priority 1: Daemon is actively syncing a job — build active_job from daemon progress
if daemon_job_id and daemon_status.get('state') == 'syncing':
active_job = {
'id': daemon_job_id,
'status': 'running',
'started_at': daemon_status.get('started_at'),
'completed_at': None,
'dirs_synced': daemon_status.get('dirs_synced', 0),
'dirs_total': daemon_status.get('dirs_total', 0),
'current_dir': daemon_status.get('current_dir'),
'current_file': daemon_status.get('current_file'),
'transfer_stats': daemon_status.get('transfer_stats'),
'phase': daemon_status.get('phase', 'syncing'),
'error': None,
}
# Also update in-memory job to stay in sync
if _get_sync_job(daemon_job_id):
_update_sync_job(daemon_job_id, {
'dirs_synced': daemon_status.get('dirs_synced', 0),
'dirs_total': daemon_status.get('dirs_total', 0),
'current_dir': daemon_status.get('current_dir'),
'current_file': daemon_status.get('current_file'),
'transfer_stats': daemon_status.get('transfer_stats'),
'phase': daemon_status.get('phase', 'syncing'),
})
# Priority 2: Daemon has a completed job — update in-memory job
elif daemon_job_id and daemon_status.get('completed_at'):
in_mem = _get_sync_job(daemon_job_id)
if in_mem and in_mem.get('status') == 'running':
completed_status = 'completed' if not daemon_status.get('sync_error') else 'completed_with_errors'
_update_sync_job(daemon_job_id, {
'status': completed_status,
'completed_at': daemon_status.get('completed_at'),
'dirs_synced': daemon_status.get('dirs_synced', 0),
'dirs_total': daemon_status.get('dirs_total', 0),
'error': daemon_status.get('sync_error'),
'phase': 'completed',
'current_dir': None,
'current_file': None,
'transfer_stats': None,
})
# Sync just finished in daemon — refresh cloud storage stats
_trigger_cloud_status_refresh()
# Priority 3: Fall back to in-memory jobs (legacy / pending trigger)
if active_job is None:
with _jobs_lock:
for job in _sync_jobs.values():
if job['status'] == 'running':
active_job = dict(job)
break
# Determine effective state
is_syncing = active_job is not None or daemon_status.get('state') == 'syncing'
if active_job is not None:
effective_state = 'syncing'
elif daemon_status.get('state'):
effective_state = daemon_status['state']
elif service_status == 'active':
effective_state = 'idle'
else:
effective_state = 'idle'
return {
"configured": bool(config.get('key_id') and config.get('bucket')),
"enabled": config.get('enabled', False),
"service_status": service_status,
"syncing": is_syncing,
"last_sync": daemon_status.get('last_sync', daemon_status.get('last_manual_sync')),
"last_sync_status": daemon_status.get('last_sync_status', daemon_status.get('last_manual_sync_status')),
"files_watched": daemon_status.get('files_watched', 0),
"storage_used": daemon_status.get('storage_used'),
"error_count": daemon_status.get('error_count', 0),
"last_error": daemon_status.get('last_error'),
"cooldown_remaining": daemon_status.get('cooldown_remaining', 0),
"active_job": active_job,
"state": effective_state,
}
@router.post("/sync")
async def trigger_sync(user=Depends(get_current_user)):
"""Trigger a manual sync by delegating to the daemon via trigger file.
The sync runs inside the daemon process (cloud-backup-sync.service),
so it survives API/scheduler restarts. Progress is tracked via the
daemon's status file.
"""
from fastapi import HTTPException
config = _load_config()
if not config.get('key_id') or not config.get('bucket'):
raise HTTPException(status_code=400, detail="Cloud backup not configured")
# Check daemon is running
service_status = _get_service_status()
if service_status != "active":
raise HTTPException(status_code=503, detail=f"Cloud backup daemon is not running (status: {service_status})")
# Check daemon not already syncing (via status file)
daemon_status = _read_status_file()
if daemon_status.get('state') == 'syncing':
raise HTTPException(status_code=409, detail="A sync is already in progress")
# Check no trigger file already pending
if TRIGGER_FILE.exists():
raise HTTPException(status_code=409, detail="A sync trigger is already pending")
# Check for in-memory running jobs (race condition guard)
with _jobs_lock:
for job in _sync_jobs.values():
if job['status'] == 'running':
raise HTTPException(status_code=409, detail="A sync is already in progress")
job_id = str(uuid.uuid4())
# Write trigger file for daemon to pick up
trigger_data = {
"type": "sync",
"job_id": job_id,
"requested_at": datetime.now().isoformat(),
"requested_by": "api",
}
try:
TRIGGER_FILE.write_text(json.dumps(trigger_data))
except OSError as e:
raise HTTPException(status_code=500, detail=f"Failed to write trigger file: {e}")
# Create in-memory job tracker for immediate API response
_create_sync_job(job_id)
return {"status": "started", "job_id": job_id}
@router.post("/pause")
async def pause_sync(user=Depends(get_current_user)):
"""Stop the cloud backup sync service."""
try:
result = subprocess.run(
["systemctl", "stop", SERVICE_NAME],
capture_output=True, text=True, timeout=30
)
if result.returncode != 0:
return {"status": "error", "message": result.stderr.strip()}
return {"status": "stopped"}
except Exception as e:
logger.error(f"Failed to stop sync service: {e}")
return {"status": "error", "message": str(e)}
@router.post("/resume")
async def resume_sync(user=Depends(get_current_user)):
"""Start the cloud backup sync service."""
try:
result = subprocess.run(
["systemctl", "start", SERVICE_NAME],
capture_output=True, text=True, timeout=30
)
if result.returncode != 0:
return {"status": "error", "message": result.stderr.strip()}
return {"status": "started"}
except Exception as e:
logger.error(f"Failed to start sync service: {e}")
return {"status": "error", "message": str(e)}
def _format_progress_summary(progress_line: str) -> str:
"""Format a [progress] line into a clean summary like ' Checked 108,024 / 118,056 files — 100% — 46.985 KiB/s'."""
parts = []
chk_m = re.search(r'chk#(\d+)/(\d+)', progress_line)
if chk_m:
parts.append(f"Checked {int(chk_m.group(1)):,} / {int(chk_m.group(2)):,} files")
xfr_m = re.search(r'xfr#(\d+)/(\d+)', progress_line)
if xfr_m:
parts.append(f"Transferred {int(xfr_m.group(1)):,} / {int(xfr_m.group(2)):,} files")
pct_m = re.search(r'(\d+)%', progress_line)
if pct_m:
parts.append(f"{pct_m.group(1)}%")
speed_m = re.search(r'([\d.]+\s*\S*i?B/s)', progress_line)
if speed_m:
parts.append(speed_m.group(1))
return f" {''.join(parts)}" if parts else ""
@router.get("/logs")
async def get_logs(lines: int = 200, user=Depends(get_current_user)):
"""Get last N lines from the cloud backup sync log, cleaned and summarized."""
if not LOG_FILE.exists():
return {"logs": "", "lines": 0}
try:
result = subprocess.run(
["tail", "-n", str(min(lines * 10, 5000)), str(LOG_FILE)],
capture_output=True, text=True, timeout=10
)
# Whitelist-based parser: only emit lines matching known useful patterns.
# Everything else (daemon internals, tracebacks, config noise) is dropped.
output = []
last_progress = None
# Regex to strip daemon/universal-logger timestamp prefixes from any line
_prefix_re = re.compile(
r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}[.,]\d+\s+'
r'(?:\[MediaDownloader\.\w+\]\s+)?'
r'(?:\[(?:INFO|DEBUG|WARNING|ERROR)\]\s+)?'
r'(?:\[Core\]\s+\[(?:INFO|DEBUG|WARNING|ERROR)\]\s+)?'
)
for line in result.stdout.splitlines():
raw = line.strip()
if not raw:
continue
# Skip universal-logger formatted lines entirely (they duplicate
# the file_handler lines with a different format)
if '[MediaDownloader.' in raw:
continue
# Strip daemon log prefix to get the clean message
stripped = _prefix_re.sub('', raw).strip()
if not stripped:
continue
# ── Capture rclone progress lines (emitted by daemon Popen loop) ──
if re.match(r'^\s*\[progress\]', stripped):
last_progress = stripped
continue
# ── Section headers: "[2026-03-22 01:13:58] Syncing paid/ (1/11)" ──
if re.match(r'^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s+', stripped):
# Flush pending progress summary before new section
if last_progress:
output.append(_format_progress_summary(last_progress))
last_progress = None
output.append(stripped)
continue
# ── Separator lines: "===..." ──
if re.match(r'^={10,}$', stripped):
output.append(stripped)
continue
# ── File transfer lines: "fname: Copied (new, replaced existing)" ──
m_xfer = re.match(r'^(.+?):\s*(Copied|Moved|Deleted)\s*(.*)', stripped)
if m_xfer:
fname = m_xfer.group(1).strip()
action = m_xfer.group(2)
detail = m_xfer.group(3).strip()
short = fname.split('/')[-1] if '/' in fname else fname
detail_short = f" ({detail})" if detail and detail != '(new)' else ''
output.append(f" {action}: {short}{detail_short}")
continue
# ── Our cleaned format: " Copied: filename.jpg" ──
m_clean = re.match(r'^\s*(Copied|Moved|Deleted):\s*(.+)', stripped)
if m_clean:
output.append(f" {m_clean.group(1)}: {m_clean.group(2).strip()}")
continue
# ── Completion lines from manual log writes: "Completed dir/" ──
if re.match(r'^Completed\s+\S+/', stripped):
output.append(stripped)
continue
# ── rclone sync errors ──
if re.match(r'^rclone sync failed', stripped):
if not any(stripped in prev for prev in output[-3:]):
output.append(f" ERROR: {stripped}")
continue
if re.match(r'^ERROR:', stripped):
if not any(stripped in prev for prev in output[-3:]):
output.append(stripped)
continue
# ── Sync summary lines: "Sync completed — 11 directories" ──
if re.match(r'^Sync (completed|completed with \d+ error)', stripped):
output.append(stripped)
continue
# ── pg_dump error (deduplicated) ──
if 'pg_dump failed' in stripped:
if not any('pg_dump failed' in prev for prev in output[-5:]):
output.append(f" ERROR: {stripped}")
continue
# Everything else is dropped (daemon internals, tracebacks, config noise, etc.)
# Flush trailing progress for active directory
if last_progress:
output.append(_format_progress_summary(last_progress))
# Return last N lines
final = output[-lines:] if len(output) > lines else output
log_content = '\n'.join(final)
return {"logs": log_content, "lines": len(final)}
except Exception as e:
return {"logs": f"Error reading logs: {e}", "lines": 0}
@router.post("/test")
async def test_connection(user=Depends(get_current_user)):
"""Test the rclone connection by listing the remote."""
config = _load_config()
if not config.get('key_id') or not config.get('application_key'):
from fastapi import HTTPException
raise HTTPException(status_code=400, detail="Cloud backup credentials not configured")
# Ensure rclone config is up to date
try:
_regenerate_rclone_config(config)
except Exception as e:
return {"status": "error", "message": f"Failed to write rclone config: {e}"}
# Test with rclone lsd
bucket = config.get('bucket', '')
encryption_enabled = config.get('encryption_enabled', True)
if encryption_enabled:
test_remote = f"{RCLONE_CRYPT_NAME}:"
else:
test_remote = f"{RCLONE_REMOTE_NAME}:{bucket}"
try:
result = subprocess.run(
["rclone", "lsd", test_remote, "--config", str(RCLONE_CONF_PATH), "--max-depth", "1"],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0:
dirs = [line.strip().split()[-1] for line in result.stdout.strip().splitlines() if line.strip()]
return {"status": "success", "message": "Connection successful", "remote_dirs": dirs}
else:
return {"status": "error", "message": result.stderr.strip() or "Connection failed"}
except subprocess.TimeoutExpired:
return {"status": "error", "message": "Connection timed out (30s)"}
except Exception as e:
return {"status": "error", "message": str(e)}
@router.get("/job/{job_id}")
async def get_job(job_id: str, user=Depends(get_current_user)):
"""Get the status of a specific sync job.
Checks both in-memory tracking and the daemon's status file.
Daemon status takes priority when the job is actively running there.
"""
# Check daemon status file for this job
daemon_status = _read_status_file()
daemon_job_id = daemon_status.get('job_id')
if daemon_job_id == job_id:
if daemon_status.get('state') == 'syncing':
# Daemon is actively running this job
return {
'id': job_id,
'status': 'running',
'started_at': daemon_status.get('started_at'),
'completed_at': None,
'dirs_synced': daemon_status.get('dirs_synced', 0),
'dirs_total': daemon_status.get('dirs_total', 0),
'current_dir': daemon_status.get('current_dir'),
'current_file': daemon_status.get('current_file'),
'transfer_stats': daemon_status.get('transfer_stats'),
'phase': daemon_status.get('phase', 'syncing'),
'error': None,
}
elif daemon_status.get('completed_at'):
# Daemon completed this job
has_error = bool(daemon_status.get('sync_error'))
return {
'id': job_id,
'status': 'completed_with_errors' if has_error else 'completed',
'started_at': daemon_status.get('started_at'),
'completed_at': daemon_status.get('completed_at'),
'dirs_synced': daemon_status.get('dirs_synced', 0),
'dirs_total': daemon_status.get('dirs_total', 0),
'current_dir': None,
'current_file': None,
'transfer_stats': None,
'phase': 'completed',
'error': daemon_status.get('sync_error'),
}
# Fall back to in-memory job tracking
job = _get_sync_job(job_id)
if not job:
from fastapi import HTTPException
raise HTTPException(status_code=404, detail="Job not found")
return job
@router.get("/available-dirs")
async def get_available_dirs(user=Depends(get_current_user)):
"""List available directories under /opt/immich for backup."""
dirs = []
if IMMICH_BASE.is_dir():
for entry in sorted(IMMICH_BASE.iterdir()):
if entry.is_dir() and not entry.name.startswith('.'):
dirs.append(entry.name)
return {"dirs": dirs}
CLOUD_STATUS_CACHE_FILE = Path("/tmp/cloud_storage_status.json")
CLOUD_STATUS_REFRESH_INTERVAL = 6 * 3600 # 6 hours
_cloud_status_query: Dict[str, Any] = {"running": False}
_cloud_status_lock = Lock()
def _trigger_cloud_status_refresh():
"""Start a cloud status refresh in a background thread if not already running."""
import threading
with _cloud_status_lock:
if _cloud_status_query["running"]:
return
_cloud_status_query["running"] = True
t = threading.Thread(target=_run_cloud_status_query, daemon=True)
t.start()
def _cloud_status_periodic_loop():
"""Periodic loop that refreshes cloud status every 6 hours.
On startup, checks if the cached data is stale (older than the refresh
interval) and triggers an immediate refresh if so. This prevents restarts
from resetting the 6-hour countdown indefinitely.
"""
import time as _time
# Check staleness on startup — refresh immediately if cache is old
try:
_time.sleep(30) # brief delay to let API fully start
stale = True
if CLOUD_STATUS_CACHE_FILE.exists():
cached = json.loads(CLOUD_STATUS_CACHE_FILE.read_text())
queried_at = cached.get('queried_at')
if queried_at:
from datetime import datetime as _dt
age = (_dt.now() - _dt.fromisoformat(queried_at)).total_seconds()
stale = age > CLOUD_STATUS_REFRESH_INTERVAL
if stale:
config = _load_config()
if config.get('key_id') and config.get('bucket'):
logger.info("Cloud status cache is stale on startup, refreshing...")
_trigger_cloud_status_refresh()
except Exception as e:
logger.warning(f"Startup cloud status staleness check error: {e}")
while True:
_time.sleep(CLOUD_STATUS_REFRESH_INTERVAL)
try:
config = _load_config()
if config.get('key_id') and config.get('bucket'):
logger.info("Periodic cloud status refresh starting...")
_trigger_cloud_status_refresh()
except Exception as e:
logger.warning(f"Periodic cloud status refresh error: {e}")
# Start the periodic refresh thread
import threading as _threading
_cloud_status_timer = _threading.Thread(target=_cloud_status_periodic_loop, daemon=True)
_cloud_status_timer.start()
def _run_cloud_status_query():
"""Background worker: query rclone for cloud storage stats and cache to disk."""
from concurrent.futures import ThreadPoolExecutor, as_completed
config = _load_config()
bucket = config.get('bucket', '')
encryption_enabled = config.get('encryption_enabled', True)
provider = config.get('provider', 'b2')
endpoint = config.get('endpoint', '')
if encryption_enabled:
remote = f"{RCLONE_CRYPT_NAME}:"
else:
remote = f"{RCLONE_REMOTE_NAME}:{bucket}"
result = {
"provider": provider,
"bucket": bucket,
"endpoint": endpoint,
"encryption": encryption_enabled,
"total_files": None,
"total_size": None,
"total_size_human": None,
"directories": [],
"error": None,
"queried_at": datetime.now().isoformat(),
"query_status": "running",
}
def _rclone_size(target: str, timeout: int = 120) -> dict:
r = subprocess.run(
["rclone", "size", target, "--config", str(RCLONE_CONF_PATH), "--json"],
capture_output=True, text=True, timeout=timeout
)
if r.returncode == 0:
return json.loads(r.stdout)
return {"error": r.stderr.strip()[:100]}
def _get_dir_size(d: str) -> dict:
dir_remote = f"{remote}{d}" if remote.endswith(":") else f"{remote}/{d}"
try:
dd = _rclone_size(dir_remote, timeout=90)
if "error" in dd:
return {"name": d, "files": 0, "size": 0, "size_human": "error", "error": dd["error"]}
return {
"name": d,
"files": dd.get("count", 0),
"size": dd.get("bytes", 0),
"size_human": _format_bytes(dd.get("bytes", 0)),
}
except subprocess.TimeoutExpired:
return {"name": d, "files": 0, "size": 0, "size_human": "timeout"}
except Exception as e:
return {"name": d, "files": 0, "size": 0, "size_human": "error", "error": str(e)[:100]}
try:
# Save initial "running" state so frontend can show progress
CLOUD_STATUS_CACHE_FILE.write_text(json.dumps(result))
lsd_result = subprocess.run(
["rclone", "lsd", remote, "--config", str(RCLONE_CONF_PATH), "--max-depth", "1"],
capture_output=True, text=True, timeout=30
)
if lsd_result.returncode != 0:
result["error"] = lsd_result.stderr.strip()[:200]
result["query_status"] = "error"
CLOUD_STATUS_CACHE_FILE.write_text(json.dumps(result))
return
dir_names = []
for line in lsd_result.stdout.strip().splitlines():
parts = line.strip().split()
if parts:
dir_names.append(parts[-1])
with ThreadPoolExecutor(max_workers=6) as executor:
dir_futures = {executor.submit(_get_dir_size, d): d for d in sorted(dir_names)}
dir_infos = []
for future in as_completed(dir_futures, timeout=300):
dir_infos.append(future.result())
# Compute totals from per-directory results (avoids slow rclone size on entire remote)
total_files = sum(d.get("files", 0) for d in dir_infos)
total_bytes = sum(d.get("size", 0) for d in dir_infos)
result["total_files"] = total_files
result["total_size"] = total_bytes
result["total_size_human"] = _format_bytes(total_bytes)
result["directories"] = dir_infos
result["query_status"] = "complete"
except Exception as e:
result["error"] = str(e)[:200]
result["query_status"] = "error"
finally:
result["queried_at"] = datetime.now().isoformat()
CLOUD_STATUS_CACHE_FILE.write_text(json.dumps(result))
with _cloud_status_lock:
_cloud_status_query["running"] = False
@router.get("/cloud-status")
async def get_cloud_status(user=Depends(get_current_user)):
"""Return cached cloud storage stats. Returns instantly."""
config = _load_config()
if not config.get('key_id') or not config.get('bucket'):
from fastapi import HTTPException
raise HTTPException(status_code=400, detail="Cloud backup not configured")
# Return cached results if available
cached = {}
try:
if CLOUD_STATUS_CACHE_FILE.exists():
cached = json.loads(CLOUD_STATUS_CACHE_FILE.read_text())
except (json.JSONDecodeError, OSError):
pass
with _cloud_status_lock:
is_running = _cloud_status_query["running"]
if cached:
cached["query_running"] = is_running
return cached
return {
"provider": config.get('provider', 'b2'),
"bucket": config.get('bucket', ''),
"endpoint": config.get('endpoint', ''),
"encryption": config.get('encryption_enabled', True),
"total_files": None,
"total_size": None,
"total_size_human": None,
"directories": [],
"error": None,
"queried_at": None,
"query_status": "never",
"query_running": is_running,
}
@router.post("/cloud-status/refresh")
async def refresh_cloud_status(background_tasks: BackgroundTasks, user=Depends(get_current_user)):
"""Trigger a background refresh of cloud storage stats."""
config = _load_config()
if not config.get('key_id') or not config.get('bucket'):
from fastapi import HTTPException
raise HTTPException(status_code=400, detail="Cloud backup not configured")
with _cloud_status_lock:
if _cloud_status_query["running"]:
return {"status": "already_running"}
_cloud_status_query["running"] = True
background_tasks.add_task(_run_cloud_status_query)
return {"status": "started"}
def _format_bytes(b: int) -> str:
"""Format bytes into human-readable string."""
if b is None or b == 0:
return "0 B"
for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']:
if abs(b) < 1024.0:
return f"{b:.1f} {unit}" if unit != 'B' else f"{b} B"
b /= 1024.0
return f"{b:.1f} EB"