1714 lines
79 KiB
Python
Executable File
1714 lines
79 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Move Module - Handles file moving and timestamp management
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import time
|
|
import pwd
|
|
import grp
|
|
import gc
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Optional, List, Dict, Union, Any
|
|
from modules.base_module import LoggingMixin
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('MoveManager') # For module-level functions
|
|
|
|
# Import UnifiedDatabase for file hash deduplication
|
|
try:
|
|
from .unified_database import UnifiedDatabase
|
|
except ImportError:
|
|
try:
|
|
from unified_database import UnifiedDatabase
|
|
except ImportError:
|
|
UnifiedDatabase = None
|
|
logger.warning("UnifiedDatabase not available - file hash deduplication disabled")
|
|
|
|
# Import date utilities for EXIF timestamp updates
|
|
try:
|
|
from modules.date_utils import DateHandler
|
|
DATE_UTILS_AVAILABLE = True
|
|
except ImportError:
|
|
try:
|
|
from .date_utils import DateHandler
|
|
DATE_UTILS_AVAILABLE = True
|
|
except ImportError:
|
|
DATE_UTILS_AVAILABLE = False
|
|
logger.debug("DateHandler not available - EXIF updates disabled")
|
|
|
|
|
|
def _extract_exif_date(filepath: Path) -> Optional[datetime]:
|
|
"""Extract date from EXIF metadata using exiftool
|
|
|
|
Checks DateTimeOriginal, CreateDate, then DateCreated in order of preference.
|
|
Returns None if no valid date found.
|
|
"""
|
|
import subprocess
|
|
try:
|
|
result = subprocess.run([
|
|
'exiftool', '-s', '-s', '-s',
|
|
'-DateTimeOriginal', '-CreateDate', '-DateCreated',
|
|
str(filepath)
|
|
], capture_output=True, text=True, timeout=10)
|
|
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
# exiftool returns dates in format "YYYY:MM:DD HH:MM:SS"
|
|
for line in result.stdout.strip().split('\n'):
|
|
date_str = line.strip()
|
|
if date_str and date_str != '-':
|
|
try:
|
|
# Try parsing EXIF date format
|
|
return datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
|
|
except ValueError:
|
|
try:
|
|
# Try alternate format without time
|
|
return datetime.strptime(date_str, "%Y:%m:%d")
|
|
except ValueError:
|
|
continue
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
pass
|
|
except Exception as e:
|
|
logger.debug(f"EXIF date extraction failed for {filepath}: {e}")
|
|
return None
|
|
|
|
|
|
# Pushover notifier handled by scheduler/notification system
|
|
|
|
class MoveManager(LoggingMixin):
|
|
"""Manages file moves with proper timestamp handling and notifications"""
|
|
|
|
def __init__(self, log_callback=None, notifier=None, unified_db=None, face_recognition_enabled=True, on_download_complete=None, event_emitter=None):
|
|
"""
|
|
Initialize the MoveManager
|
|
|
|
Args:
|
|
log_callback: Optional callback function for logging (tag, level, message)
|
|
notifier: Optional PushoverNotifier instance for batch notifications
|
|
unified_db: Optional UnifiedDatabase instance for file hash deduplication
|
|
face_recognition_enabled: Enable face recognition filtering (default: True)
|
|
on_download_complete: Optional callback when downloads complete (platform, source, count)
|
|
event_emitter: Optional ScraperEventEmitter instance for real-time scraping monitor
|
|
"""
|
|
# Initialize logging via mixin
|
|
self._init_logger('MoveManager', log_callback, default_module='Move')
|
|
|
|
self.notifier = notifier
|
|
self.unified_db = unified_db
|
|
self.face_recognition_enabled = face_recognition_enabled
|
|
self.on_download_complete = on_download_complete
|
|
self.event_emitter = event_emitter
|
|
self.current_session = {} # Store session context for event emission
|
|
self.face_module = None
|
|
self.stats = {
|
|
'moved': 0,
|
|
'skipped': 0,
|
|
'failed': 0,
|
|
'duplicates': 0, # Track files skipped due to duplicate content
|
|
'review_queue': 0 # Track files moved to review queue (no face match)
|
|
}
|
|
# Track moved files for batch notifications
|
|
self.batch_context = None
|
|
self.moved_files = []
|
|
self.review_queue_files = [] # Separate tracking for review queue items
|
|
self.repost_queue = [] # Queue for repost detection processing (done after moves complete)
|
|
self._last_move_had_face_recognition = False # Tracks if last move_file ran face recognition
|
|
|
|
# Background thread pool for non-blocking post-processing (thumbnails, dimensions)
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='move_bg')
|
|
|
|
# Initialize face recognition if enabled
|
|
if self.face_recognition_enabled and self.unified_db:
|
|
try:
|
|
from modules.face_recognition_module import FaceRecognitionModule
|
|
self.face_module = FaceRecognitionModule(
|
|
unified_db=self.unified_db,
|
|
log_callback=lambda msg, lvl: self.log(msg, lvl)
|
|
)
|
|
self.log("Face recognition module initialized", "info")
|
|
except Exception as e:
|
|
self.log(f"Failed to initialize face recognition: {e}", "warning")
|
|
self.face_module = None
|
|
|
|
# Cache face recognition settings (read once from DB, reused per file)
|
|
self._face_recognition_tolerance = None
|
|
self._review_path = None
|
|
self._face_recognition_settings_cache = None # Raw JSON settings blob
|
|
self._video_face_settings_cache = None # Parsed video settings dict
|
|
|
|
# Initialize activity status manager for real-time updates
|
|
from modules.activity_status import get_activity_manager
|
|
self.activity_manager = get_activity_manager(unified_db)
|
|
|
|
# Log database status for debugging hash deduplication
|
|
if self.unified_db:
|
|
self.log("MoveManager initialized with UnifiedDatabase - hash deduplication ENABLED", "info")
|
|
else:
|
|
self.log("MoveManager initialized WITHOUT UnifiedDatabase - hash deduplication DISABLED", "warning")
|
|
|
|
if not UnifiedDatabase:
|
|
self.log("UnifiedDatabase class not imported - hash deduplication DISABLED", "warning")
|
|
|
|
def set_session_context(self, platform: str, account: str, session_id: str):
|
|
"""Set context for current scraping session for event emission
|
|
|
|
Args:
|
|
platform: Platform name (e.g., 'instagram', 'snapchat')
|
|
account: Account/username being scraped
|
|
session_id: Unique session identifier for this scraping run
|
|
"""
|
|
self.current_session = {
|
|
'platform': platform,
|
|
'account': account,
|
|
'session_id': session_id
|
|
}
|
|
self.log(f"Session context set: {platform}/{account} ({session_id})", "debug")
|
|
|
|
def release_models(self):
|
|
"""
|
|
Release ML models to free memory.
|
|
Call this after batch processing to prevent OOM in long-running services.
|
|
Models will be lazy-loaded again when needed.
|
|
"""
|
|
if self.face_module is not None:
|
|
self.face_module.release_model()
|
|
self.log("Face recognition model released to free memory", "info")
|
|
gc.collect()
|
|
|
|
def _is_image_file(self, file_path: Path) -> bool:
|
|
"""Check if file is an image (not video)"""
|
|
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic'}
|
|
return file_path.suffix.lower() in image_extensions
|
|
|
|
def _is_video_file(self, file_path: Path) -> bool:
|
|
"""Check if file is a video"""
|
|
video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v'}
|
|
return file_path.suffix.lower() in video_extensions
|
|
|
|
def _generate_thumbnail_cache(self, file_path: Path, content_hash: str = None) -> None:
|
|
"""
|
|
Pre-generate and cache thumbnail for a file.
|
|
This speeds up page loading by having thumbnails ready in the database.
|
|
|
|
Uses content hash (SHA256 of file content) as cache key so thumbnails
|
|
survive file moves (e.g., to recycle bin).
|
|
|
|
Args:
|
|
file_path: Path to the media file
|
|
content_hash: Optional pre-computed SHA256 content hash
|
|
"""
|
|
try:
|
|
import hashlib
|
|
import io
|
|
import subprocess
|
|
from PIL import Image
|
|
|
|
thumb_db_path = Path('/opt/media-downloader/database/thumbnails.db')
|
|
|
|
# Calculate content hash for cache key (survives file moves)
|
|
if content_hash:
|
|
file_hash = content_hash
|
|
else:
|
|
# Calculate SHA256 of file content
|
|
sha256 = hashlib.sha256()
|
|
with open(file_path, 'rb') as f:
|
|
for chunk in iter(lambda: f.read(65536), b''):
|
|
sha256.update(chunk)
|
|
file_hash = sha256.hexdigest()
|
|
|
|
file_mtime = file_path.stat().st_mtime
|
|
|
|
# Check if already cached
|
|
conn = sqlite3.connect(str(thumb_db_path), timeout=10.0)
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT 1 FROM thumbnails WHERE file_hash = ?", (file_hash,))
|
|
if cursor.fetchone():
|
|
return # Already cached
|
|
|
|
# Generate thumbnail
|
|
thumbnail_data = None
|
|
max_size = (300, 300)
|
|
|
|
if self._is_video_file(file_path):
|
|
# Video thumbnail using ffmpeg
|
|
try:
|
|
result = subprocess.run([
|
|
'ffmpeg', '-i', str(file_path),
|
|
'-ss', '00:00:01.000', '-vframes', '1',
|
|
'-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
|
|
], capture_output=True, timeout=10)
|
|
|
|
if result.returncode == 0 and result.stdout:
|
|
img = Image.open(io.BytesIO(result.stdout))
|
|
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
|
if img.mode != 'RGB':
|
|
img = img.convert('RGB')
|
|
buffer = io.BytesIO()
|
|
img.save(buffer, format='JPEG', quality=85)
|
|
thumbnail_data = buffer.getvalue()
|
|
except Exception as e:
|
|
self.log(f"Video thumbnail generation failed: {e}", "debug")
|
|
|
|
elif self._is_image_file(file_path):
|
|
# Image thumbnail
|
|
try:
|
|
img = Image.open(file_path)
|
|
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
|
|
|
if img.mode in ('RGBA', 'LA', 'P'):
|
|
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
if img.mode == 'P':
|
|
img = img.convert('RGBA')
|
|
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
|
img = background
|
|
|
|
buffer = io.BytesIO()
|
|
img.save(buffer, format='JPEG', quality=85)
|
|
thumbnail_data = buffer.getvalue()
|
|
except Exception as e:
|
|
self.log(f"Image thumbnail generation failed: {e}", "debug")
|
|
|
|
# Cache the thumbnail
|
|
if thumbnail_data:
|
|
from datetime import datetime
|
|
cursor.execute("""
|
|
INSERT OR REPLACE INTO thumbnails
|
|
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
|
|
conn.commit()
|
|
self.log(f"Cached thumbnail: {file_path.name}", "debug")
|
|
finally:
|
|
conn.close()
|
|
|
|
except Exception as e:
|
|
self.log(f"Thumbnail cache generation failed for {file_path.name}: {e}", "debug")
|
|
|
|
def _record_file_inventory_bg(self, destination: Path, source_name_file: str,
|
|
platform: str, source_name: str,
|
|
moved_to_review: bool, file_hash: str = None,
|
|
timestamp: 'datetime' = None) -> None:
|
|
"""Record file in inventory table with dimensions (runs in background thread).
|
|
|
|
Args:
|
|
destination: Final destination path
|
|
source_name_file: Original source filename
|
|
platform: Platform name
|
|
source_name: Source/username
|
|
moved_to_review: Whether file was sent to review queue
|
|
file_hash: Pre-computed file hash (or None)
|
|
timestamp: Post date extracted from filename (or None to use current time)
|
|
"""
|
|
try:
|
|
location = 'review' if moved_to_review else 'final'
|
|
content_type_val = 'image' if self._is_image_file(destination) else 'video'
|
|
|
|
from datetime import datetime, timezone
|
|
file_stat = destination.stat()
|
|
if timestamp:
|
|
inventory_created = timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
else:
|
|
inventory_created = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
# Look up download method from downloads table
|
|
method = None
|
|
if platform == 'instagram':
|
|
try:
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT method FROM downloads
|
|
WHERE filename = ? AND platform = 'instagram'
|
|
LIMIT 1
|
|
''', (source_name_file,))
|
|
row = cursor.fetchone()
|
|
if row and row[0]:
|
|
method = row[0]
|
|
except Exception:
|
|
pass
|
|
|
|
# Extract media dimensions
|
|
width, height = None, None
|
|
try:
|
|
if content_type_val == 'image':
|
|
from PIL import Image
|
|
with Image.open(destination) as img:
|
|
width, height = img.size
|
|
elif content_type_val == 'video':
|
|
import subprocess
|
|
result = subprocess.run(
|
|
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
|
'-show_entries', 'stream=width,height', '-of', 'csv=p=0',
|
|
str(destination)],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
parts = result.stdout.strip().split(',')
|
|
if len(parts) >= 2:
|
|
width, height = int(parts[0]), int(parts[1])
|
|
except Exception as e:
|
|
self.log(f"Could not extract dimensions for {destination.name}: {e}", "debug")
|
|
|
|
self.unified_db.upsert_file_inventory(
|
|
file_path=str(destination),
|
|
filename=destination.name,
|
|
platform=platform,
|
|
source=source_name,
|
|
content_type=content_type_val,
|
|
file_size=file_stat.st_size,
|
|
file_hash=file_hash if file_hash else None,
|
|
width=width,
|
|
height=height,
|
|
location=location,
|
|
created_date=inventory_created,
|
|
method=method
|
|
)
|
|
self.log(f"Recorded in file_inventory: {destination.name} (location={location})", "debug")
|
|
|
|
# Queue for discovery scans (embedding) for files going to final
|
|
if location == 'final':
|
|
try:
|
|
self.unified_db.queue_file_for_discovery(
|
|
str(destination),
|
|
scan_types=['embedding'],
|
|
priority=5
|
|
)
|
|
self.log(f"Queued {destination.name} for discovery scans", "debug")
|
|
except Exception as e:
|
|
self.log(f"Failed to queue for discovery: {e}", "debug")
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to record in file_inventory: {e}", "debug")
|
|
|
|
def _get_face_recognition_settings(self) -> dict:
|
|
"""Get face recognition settings from database (cached per session).
|
|
|
|
Returns:
|
|
dict: Face recognition settings, or empty dict if unavailable.
|
|
"""
|
|
if self._face_recognition_settings_cache is not None:
|
|
return self._face_recognition_settings_cache
|
|
|
|
if self.unified_db:
|
|
try:
|
|
import json
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'face_recognition'")
|
|
result = cursor.fetchone()
|
|
if result:
|
|
self._face_recognition_settings_cache = json.loads(result[0])
|
|
return self._face_recognition_settings_cache
|
|
except Exception as e:
|
|
self.log(f"Failed to read face recognition settings: {e}", "debug")
|
|
|
|
self._face_recognition_settings_cache = {}
|
|
return self._face_recognition_settings_cache
|
|
|
|
def _get_face_recognition_tolerance(self, is_video: bool = False, source: str = None) -> float:
|
|
"""Get face recognition tolerance from database settings
|
|
|
|
Args:
|
|
is_video: If True, get video_tolerance instead of image tolerance
|
|
source: Optional source username (e.g., 'evalongoria') for source-based tolerance
|
|
|
|
Returns:
|
|
float: Tolerance value (0.0-1.0), defaults to 0.15 for images, 0.30 for videos
|
|
"""
|
|
settings = self._get_face_recognition_settings()
|
|
|
|
if settings:
|
|
# Get base tolerance (video or image)
|
|
if is_video:
|
|
base_tolerance = float(settings.get('video_tolerance', settings.get('tolerance', 0.30)))
|
|
else:
|
|
base_tolerance = float(settings.get('tolerance', 0.15))
|
|
|
|
# Check for source-specific tolerance
|
|
if source:
|
|
source_tolerances = settings.get('source_tolerances', {})
|
|
if source in source_tolerances:
|
|
source_tolerance = float(source_tolerances[source])
|
|
if is_video:
|
|
tolerance = max(source_tolerance, base_tolerance)
|
|
self.log(f"Using max of source ({source_tolerance}) and video ({base_tolerance}) tolerance for '{source}': {tolerance}", "debug")
|
|
else:
|
|
tolerance = source_tolerance
|
|
self.log(f"Using source-based tolerance for '{source}': {tolerance}", "debug")
|
|
return tolerance
|
|
|
|
return base_tolerance
|
|
|
|
# Default: 0.15 for images, 0.30 for videos
|
|
return 0.30 if is_video else 0.15
|
|
|
|
def _get_review_path(self) -> str:
|
|
"""Get review path from database settings
|
|
|
|
Returns:
|
|
str: Review path, defaults to '/opt/immich/review' if not set
|
|
"""
|
|
# Return cached value if available
|
|
if self._review_path is not None:
|
|
return self._review_path
|
|
|
|
# Try to read from database
|
|
if self.unified_db:
|
|
try:
|
|
import json
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'face_recognition'")
|
|
result = cursor.fetchone()
|
|
if result:
|
|
settings = json.loads(result[0])
|
|
review_path = settings.get('review_path', '/opt/immich/review')
|
|
self._review_path = review_path
|
|
self.log(f"Using review path: {review_path}", "debug")
|
|
return review_path
|
|
except Exception as e:
|
|
self.log(f"Failed to read review path from database: {e}", "debug")
|
|
|
|
# Default to /opt/immich/review if not found
|
|
self._review_path = '/opt/immich/review'
|
|
return '/opt/immich/review'
|
|
|
|
def _get_video_face_recognition_settings(self) -> dict:
|
|
"""Get video face recognition settings from database (cached per session).
|
|
|
|
Returns:
|
|
dict: Settings with enable_video_recognition, video_face_frames, frame_positions
|
|
"""
|
|
if self._video_face_settings_cache is not None:
|
|
return self._video_face_settings_cache
|
|
|
|
default_settings = {
|
|
'enable_video_recognition': False,
|
|
'video_face_frames': 3,
|
|
'frame_positions': [0.1, 0.5, 0.9]
|
|
}
|
|
|
|
settings = self._get_face_recognition_settings()
|
|
if settings:
|
|
self._video_face_settings_cache = {
|
|
'enable_video_recognition': bool(settings.get('enable_video_recognition', False)),
|
|
'video_face_frames': int(settings.get('video_face_frames', 3)),
|
|
'frame_positions': settings.get('frame_positions', [0.1, 0.5, 0.9])
|
|
}
|
|
else:
|
|
self._video_face_settings_cache = default_settings
|
|
|
|
return self._video_face_settings_cache
|
|
|
|
def _apply_ownership(self, file_path: Path) -> None:
|
|
"""Apply configured file ownership to moved files
|
|
|
|
Args:
|
|
file_path: Path to file to change ownership
|
|
"""
|
|
if not self.unified_db:
|
|
return
|
|
|
|
try:
|
|
# Get ownership settings from database
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'file_ownership'")
|
|
result = cursor.fetchone()
|
|
|
|
if not result:
|
|
return # No ownership configured
|
|
|
|
import json
|
|
settings = json.loads(result[0])
|
|
|
|
# Check if ownership is enabled
|
|
if not settings.get('enabled', False):
|
|
return
|
|
|
|
owner = settings.get('owner', '').strip()
|
|
group = settings.get('group', '').strip()
|
|
|
|
if not owner and not group:
|
|
return # Nothing to change
|
|
|
|
# Get current file ownership
|
|
stat_info = file_path.stat()
|
|
uid = stat_info.st_uid
|
|
gid = stat_info.st_gid
|
|
|
|
# Resolve owner username to UID
|
|
if owner:
|
|
try:
|
|
uid = pwd.getpwnam(owner).pw_uid
|
|
except KeyError:
|
|
self.log(f"Warning: User '{owner}' not found, skipping ownership change", "warning")
|
|
return
|
|
|
|
# Resolve group name to GID
|
|
if group:
|
|
try:
|
|
gid = grp.getgrnam(group).gr_gid
|
|
except KeyError:
|
|
self.log(f"Warning: Group '{group}' not found, skipping ownership change", "warning")
|
|
return
|
|
|
|
# Change ownership
|
|
os.chown(file_path, uid, gid)
|
|
self.log(f"Changed ownership: {file_path.name} → {owner}:{group}", "debug")
|
|
|
|
except PermissionError:
|
|
self.log(f"Permission denied changing ownership of {file_path.name} (run as root/sudo)", "warning")
|
|
except Exception as e:
|
|
self.log(f"Failed to apply ownership to {file_path.name}: {e}", "debug")
|
|
|
|
def _apply_ownership_to_path(self, dir_path: Path) -> None:
|
|
"""Apply configured file ownership to a directory and all parent directories
|
|
|
|
Args:
|
|
dir_path: Path to directory to change ownership (and parents)
|
|
"""
|
|
if not self.unified_db or not dir_path.exists():
|
|
return
|
|
|
|
try:
|
|
# Get ownership settings from database
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'file_ownership'")
|
|
result = cursor.fetchone()
|
|
|
|
if not result:
|
|
return # No ownership configured
|
|
|
|
import json
|
|
settings = json.loads(result[0])
|
|
|
|
# Check if ownership is enabled
|
|
if not settings.get('enabled', False):
|
|
return
|
|
|
|
owner = settings.get('owner', '').strip()
|
|
group = settings.get('group', '').strip()
|
|
|
|
if not owner and not group:
|
|
return # Nothing to change
|
|
|
|
# Resolve owner username to UID
|
|
uid = -1 # -1 means don't change
|
|
gid = -1 # -1 means don't change
|
|
|
|
if owner:
|
|
try:
|
|
uid = pwd.getpwnam(owner).pw_uid
|
|
except KeyError:
|
|
self.log(f"Warning: User '{owner}' not found", "warning")
|
|
return
|
|
|
|
if group:
|
|
try:
|
|
gid = grp.getgrnam(group).gr_gid
|
|
except KeyError:
|
|
self.log(f"Warning: Group '{group}' not found", "warning")
|
|
return
|
|
|
|
# Apply ownership to this directory and all parents up to /opt/immich/md
|
|
base_path = Path("/opt/immich/md")
|
|
current_path = dir_path
|
|
|
|
while current_path != current_path.parent: # Stop at filesystem root
|
|
# Stop if we've gone above the base path
|
|
if not current_path.is_relative_to(base_path):
|
|
break
|
|
|
|
# Change ownership of this directory
|
|
try:
|
|
os.chown(current_path, uid, gid)
|
|
self.log(f"Changed directory ownership: {current_path} → {owner}:{group}", "debug")
|
|
except PermissionError:
|
|
self.log(f"Permission denied changing ownership of {current_path}", "debug")
|
|
break
|
|
except Exception as e:
|
|
self.log(f"Failed to change ownership of {current_path}: {e}", "debug")
|
|
break
|
|
|
|
# Move to parent directory
|
|
current_path = current_path.parent
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to apply ownership to directories: {e}", "debug")
|
|
|
|
def move_file(self,
|
|
source: Union[str, Path],
|
|
destination: Union[str, Path],
|
|
timestamp: Optional[datetime] = None,
|
|
preserve_if_no_timestamp: bool = True,
|
|
content_type: str = None) -> bool:
|
|
"""
|
|
Move a single file with optional timestamp setting
|
|
|
|
Args:
|
|
source: Source file path
|
|
destination: Destination file path
|
|
timestamp: Optional datetime to set on the file
|
|
preserve_if_no_timestamp: If True and no timestamp provided, preserve original
|
|
content_type: Optional content type for tracking
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
source = Path(source)
|
|
destination = Path(destination)
|
|
|
|
if not source.exists():
|
|
self.log(f"Source file not found: {source}", "error")
|
|
self.stats['failed'] += 1
|
|
return False
|
|
|
|
if destination.exists():
|
|
self.log(f"Skipping existing file: {destination.name}", "info")
|
|
self.stats['skipped'] += 1
|
|
|
|
# Update database even when skipping to prevent re-downloads
|
|
if self.unified_db and self.batch_context:
|
|
platform = self.batch_context.get('platform')
|
|
source_name = self.batch_context.get('source')
|
|
if platform and source_name:
|
|
try:
|
|
# Update database with final path (using existing destination)
|
|
updated = self.unified_db.update_file_location_by_filename(
|
|
filename=source.name,
|
|
platform=platform,
|
|
source=source_name,
|
|
final_path=str(destination)
|
|
)
|
|
if updated:
|
|
self.log(f"Updated database for skipped file: {destination.name}", "debug")
|
|
except Exception as e:
|
|
self.log(f"Failed to update database for skipped file: {e}", "debug")
|
|
|
|
return False
|
|
|
|
# Initialize file_hash before conditional block (used later in upsert_file_inventory)
|
|
file_hash = None
|
|
|
|
# File hash deduplication check (checks downloads, recycle_bin, and file_inventory)
|
|
if self.unified_db and UnifiedDatabase:
|
|
try:
|
|
self.activity_manager.update_status(f"Checking file hash: {source.name}")
|
|
file_hash = UnifiedDatabase.get_file_hash(str(source))
|
|
self.log(f"[HASH_CHECK] Calculated hash for {source.name}: {file_hash[:16] if file_hash else 'None'}...", "debug")
|
|
if file_hash:
|
|
# Check if this file hash already exists anywhere (downloads, recycle_bin, file_inventory)
|
|
is_duplicate = self.unified_db.is_file_hash_downloaded(file_hash)
|
|
self.log(f"[HASH_CHECK] Hash exists in system: {is_duplicate}", "debug")
|
|
|
|
if is_duplicate:
|
|
# Get details from downloads table first (most common case)
|
|
existing = self.unified_db.get_download_by_file_hash(file_hash)
|
|
|
|
if existing:
|
|
existing_path = existing.get('file_path', 'unknown')
|
|
existing_filename = existing.get('filename', 'unknown')
|
|
existing_platform = existing.get('platform', 'unknown')
|
|
existing_source = existing.get('source', 'unknown')
|
|
|
|
self.log(f"[HASH_CHECK] Existing file: {existing_filename} at {existing_path}", "debug")
|
|
self.log(f"[HASH_CHECK] Source file: {source.name} at {source}", "debug")
|
|
self.log(f"[HASH_CHECK] Paths match: {str(source) == existing_path}", "debug")
|
|
|
|
if existing_path and str(source) != existing_path:
|
|
# Check if existing file actually exists at that path
|
|
existing_file_exists = Path(existing_path).exists() if existing_path != 'unknown' else False
|
|
self.log(f"[HASH_CHECK] Existing file exists on disk: {existing_file_exists}", "debug")
|
|
|
|
if existing_file_exists:
|
|
# Duplicate found - keep first file, delete new
|
|
self.log(
|
|
f"Skipping duplicate file (same content): {source.name} "
|
|
f"[Already exists: {existing_filename} from {existing_platform}/{existing_source} at {existing_path}]",
|
|
"warning"
|
|
)
|
|
self.stats['duplicates'] += 1
|
|
|
|
# Delete the duplicate source file to save space
|
|
try:
|
|
source.unlink()
|
|
self.log(f"Deleted duplicate: {source.name}", "debug")
|
|
except Exception as e:
|
|
self.log(f"Failed to delete duplicate {source.name}: {e}", "warning")
|
|
|
|
return False
|
|
else:
|
|
# Existing file path in DB but file doesn't exist - allow move and update DB
|
|
self.log(f"[HASH_CHECK] Existing record found but file missing at {existing_path}, allowing move to proceed", "debug")
|
|
else:
|
|
self.log(f"[HASH_CHECK] Source and existing paths match or no existing path, allowing move", "debug")
|
|
else:
|
|
# Hash exists in recycle_bin or file_inventory but not downloads
|
|
self.log(f"Skipping duplicate file: {source.name} [Hash exists in recycle bin or review queue]", "warning")
|
|
self.stats['duplicates'] += 1
|
|
|
|
# Delete the duplicate source file
|
|
try:
|
|
source.unlink()
|
|
self.log(f"Deleted duplicate: {source.name}", "debug")
|
|
except Exception as e:
|
|
self.log(f"Failed to delete duplicate {source.name}: {e}", "warning")
|
|
|
|
return False
|
|
except Exception as e:
|
|
# Don't fail the move if hash check fails, just log and continue
|
|
self.log(f"File hash check failed for {source.name}: {e}", "debug")
|
|
|
|
# Instagram perceptual duplicate detection (visually similar content with overlays)
|
|
if self.batch_context:
|
|
platform = self.batch_context.get('platform', '')
|
|
source_name = self.batch_context.get('source', '')
|
|
content_type = self.batch_context.get('content_type', '')
|
|
|
|
# Check if this is an Instagram downloader (instagram, fastdl, imginn, instaloader, toolzu)
|
|
platform_lower = platform.lower()
|
|
is_instagram = any(ig_platform in platform_lower for ig_platform in ['instagram', 'fastdl', 'imginn', 'instaloader', 'toolzu'])
|
|
|
|
if is_instagram:
|
|
self.activity_manager.update_status(f"Checking perceptual hash: {source.name}")
|
|
perceptual_result = self._check_perceptual_duplicate(str(source), platform, source_name, content_type)
|
|
if perceptual_result == "skip":
|
|
# This file is a lower quality perceptual duplicate - skip it
|
|
self.log(f"Skipping perceptual duplicate (has overlays or lower quality): {source.name}", "info")
|
|
return False
|
|
# If perceptual_result is None or file_path, continue processing
|
|
|
|
# Track if file is being moved to review queue
|
|
moved_to_review = False
|
|
original_intended_path = str(destination)
|
|
queued_for_repost_check = False # Track if we added this file to repost queue
|
|
|
|
# Instagram story repost detection (only if enabled in settings)
|
|
if self._is_instagram_story(source) and self.batch_context:
|
|
source_username = self.batch_context.get('source', '')
|
|
platform = self.batch_context.get('platform', '')
|
|
|
|
# Check if this is an Instagram downloader (instagram, fastdl, imginn, instaloader, toolzu)
|
|
platform_lower = platform.lower()
|
|
is_instagram = any(ig_platform in platform_lower for ig_platform in ['instagram', 'fastdl', 'imginn', 'instaloader', 'toolzu'])
|
|
|
|
if is_instagram:
|
|
self.activity_manager.update_status(f"Checking repost detection: {source.name}")
|
|
result = self._check_repost_and_replace(str(source), source_username, str(destination))
|
|
if result == "queued":
|
|
queued_for_repost_check = True
|
|
elif result:
|
|
source = Path(result)
|
|
self.log(f"Replaced repost with original: {source.name}", "info")
|
|
|
|
# Update batch_context to reflect the ORIGINAL source
|
|
# This ensures database records it correctly as from the original user
|
|
replacement_filename = source.name.lower()
|
|
|
|
# Extract original username from filename (e.g., globalgiftfoundation_20251109_...)
|
|
import re
|
|
match = re.match(r'^([a-z0-9._]+)_\d{8}', replacement_filename)
|
|
if match:
|
|
original_source = match.group(1)
|
|
self.batch_context['source'] = original_source
|
|
|
|
# Update content_type based on filename
|
|
if 'story' in replacement_filename:
|
|
self.batch_context['content_type'] = 'story'
|
|
elif 'post' in replacement_filename:
|
|
self.batch_context['content_type'] = 'post'
|
|
elif 'reel' in replacement_filename:
|
|
self.batch_context['content_type'] = 'reel'
|
|
|
|
self.log(f"Updated batch_context: source={original_source}", "debug")
|
|
|
|
# Face recognition check
|
|
if self.face_module:
|
|
# Check if video face recognition is enabled
|
|
video_settings = self._get_video_face_recognition_settings()
|
|
|
|
# Process videos with multi-frame face recognition (if enabled)
|
|
if self._is_video_file(source):
|
|
if video_settings['enable_video_recognition']:
|
|
try:
|
|
# Get source username from batch context for source-based tolerance
|
|
source_username = self.batch_context.get('source') if self.batch_context else None
|
|
tolerance = self._get_face_recognition_tolerance(is_video=True, source=source_username)
|
|
frame_positions = video_settings['frame_positions']
|
|
|
|
self.log(f"Checking video with {len(frame_positions)} frames (tolerance: {tolerance}): {source.name}", "info")
|
|
self.activity_manager.update_status(f"Checking facial recognition: {source.name}")
|
|
|
|
# Use multi-frame checking for videos
|
|
result = self.face_module.check_video_multiframe(
|
|
str(source),
|
|
tolerance=tolerance,
|
|
positions=frame_positions
|
|
)
|
|
|
|
# Store result for event emission
|
|
self._last_face_result = result
|
|
|
|
# Log face recognition result to database
|
|
# Use best_candidate for matched_person when no match found (to show who was closest)
|
|
if self.unified_db:
|
|
try:
|
|
self.unified_db.log_face_recognition_scan(
|
|
file_path=str(destination),
|
|
has_match=result['has_match'],
|
|
matched_person=result.get('person_name') or result.get('best_candidate'),
|
|
confidence=result.get('confidence'),
|
|
face_count=result.get('face_count', 0),
|
|
scan_type='auto'
|
|
)
|
|
except Exception as log_err:
|
|
self.log(f"Failed to log face recognition result: {log_err}", "debug")
|
|
|
|
if not result['has_match']:
|
|
# No face match - move to review queue
|
|
review_path = self._get_review_path()
|
|
base_path = Path("/opt/immich/md")
|
|
if destination.is_relative_to(base_path):
|
|
relative_path = destination.relative_to(base_path)
|
|
review_dest = Path(review_path) / relative_path
|
|
else:
|
|
review_dest = Path(review_path) / source.name
|
|
|
|
best_info = f" (best: {result.get('best_candidate')} at {result.get('confidence', 0):.1%})" if result.get('best_candidate') else ""
|
|
self.log(f"No face match in video {source.name} (checked {result['frames_checked']} frames){best_info} - moving to review queue", "info")
|
|
destination = review_dest
|
|
moved_to_review = True
|
|
self.stats['review_queue'] += 1
|
|
else:
|
|
# Face matched - continue to original destination
|
|
self.log(f"Face match in video: {result['person_name']} ({result['confidence']:.1%}, frame {result['best_frame_index']+1}/{result['frames_checked']}) - proceeding to final destination", "info")
|
|
|
|
# Track that face recognition ran (batch loop handles pacing)
|
|
self._last_move_had_face_recognition = True
|
|
|
|
except Exception as e:
|
|
self.log(f"Video face recognition failed for {source.name}: {e} - moving to review queue", "warning")
|
|
# Re-check file existence - if file was deleted, skip the move
|
|
if not source.exists():
|
|
self.log(f"Source file no longer exists after video face check failure: {source.name}", "warning")
|
|
self.stats['failed'] += 1
|
|
return False
|
|
# On error, move to review queue as safety measure
|
|
review_path = self._get_review_path()
|
|
base_path = Path("/opt/immich/md")
|
|
if destination.is_relative_to(base_path):
|
|
relative_path = destination.relative_to(base_path)
|
|
review_dest = Path(review_path) / relative_path
|
|
else:
|
|
review_dest = Path(review_path) / source.name
|
|
destination = review_dest
|
|
moved_to_review = True
|
|
self.stats['review_queue'] += 1
|
|
else:
|
|
# Video face recognition disabled - skip and move to review queue
|
|
review_path = self._get_review_path()
|
|
base_path = Path("/opt/immich/md")
|
|
if destination.is_relative_to(base_path):
|
|
relative_path = destination.relative_to(base_path)
|
|
review_dest = Path(review_path) / relative_path
|
|
else:
|
|
review_dest = Path(review_path) / source.name
|
|
|
|
self.log(f"Video face recognition disabled - moving {source.name} to review queue", "debug")
|
|
destination = review_dest
|
|
moved_to_review = True
|
|
self.stats['review_queue'] += 1
|
|
|
|
# Process images with face recognition
|
|
elif self._is_image_file(source):
|
|
try:
|
|
is_video = False # Only processing images now
|
|
# Get source username from batch context for source-based tolerance
|
|
source_username = self.batch_context.get('source') if self.batch_context else None
|
|
tolerance = self._get_face_recognition_tolerance(is_video=False, source=source_username)
|
|
self.log(f"Checking image (tolerance: {tolerance}): {source.name}", "debug")
|
|
result = self.face_module.check_image(str(source), tolerance=tolerance, is_video=is_video)
|
|
|
|
# Store result for event emission
|
|
self._last_face_result = result
|
|
|
|
# Log face recognition result to database
|
|
# Use best_candidate for matched_person when no match found (to show who was closest)
|
|
if self.unified_db:
|
|
try:
|
|
self.unified_db.log_face_recognition_scan(
|
|
file_path=str(destination), # Use final destination path
|
|
has_match=result['has_match'],
|
|
matched_person=result.get('person_name') or result.get('best_candidate'),
|
|
confidence=result.get('confidence'),
|
|
face_count=result.get('face_count', 0),
|
|
scan_type='auto'
|
|
)
|
|
except Exception as log_err:
|
|
self.log(f"Failed to log face recognition result: {log_err}", "debug")
|
|
|
|
if not result['has_match']:
|
|
# No face match - move to review queue instead
|
|
# Maintain folder structure in review queue
|
|
review_path = self._get_review_path()
|
|
base_path = Path("/opt/immich/md")
|
|
if destination.is_relative_to(base_path):
|
|
# Get relative path from base
|
|
relative_path = destination.relative_to(base_path)
|
|
# Recreate under review directory
|
|
review_dest = Path(review_path) / relative_path
|
|
else:
|
|
# Fallback to flat structure if not under base path
|
|
review_dest = Path(review_path) / source.name
|
|
|
|
file_type = "video" if is_video else "image"
|
|
best_info = f" (best: {result.get('best_candidate')} at {result.get('confidence', 0):.1%})" if result.get('best_candidate') else ""
|
|
self.log(f"No face match for {file_type} {source.name}{best_info} - moving to review queue at {review_dest}", "info")
|
|
|
|
# Update destination to review path
|
|
destination = review_dest
|
|
moved_to_review = True
|
|
self.stats['review_queue'] += 1
|
|
else:
|
|
# Face matched - continue to original destination
|
|
file_type = "video" if is_video else "image"
|
|
self.log(f"Face match in {file_type}: {result['person_name']} ({result['confidence']:.1%}) - proceeding to final destination", "debug")
|
|
except Exception as e:
|
|
# Don't fail the move if face check fails, just log and continue
|
|
self.log(f"Face recognition check failed for {source.name}: {e}", "debug")
|
|
# Re-check file existence - if file was deleted, skip the move
|
|
if not source.exists():
|
|
self.log(f"Source file no longer exists after face check failure: {source.name}", "warning")
|
|
self.stats['failed'] += 1
|
|
return False
|
|
finally:
|
|
# Track that face recognition ran
|
|
self._last_move_had_face_recognition = True
|
|
|
|
try:
|
|
# Ensure destination directory exists
|
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Apply ownership to created directories
|
|
self._apply_ownership_to_path(destination.parent)
|
|
|
|
if preserve_if_no_timestamp and not timestamp:
|
|
# Try to extract EXIF date from source file first
|
|
# This fixes issues where filesystem mtime is wrong but EXIF has correct date
|
|
exif_date = None
|
|
if self._is_image_file(source):
|
|
exif_date = _extract_exif_date(source)
|
|
if exif_date:
|
|
self.log(f"Extracted EXIF date for {source.name}: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
|
|
|
|
# Copy file with metadata preserved (single pass)
|
|
shutil.copy2(str(source), str(destination))
|
|
|
|
if exif_date:
|
|
# Override timestamps with EXIF date (more accurate than filesystem)
|
|
if DATE_UTILS_AVAILABLE:
|
|
DateHandler.update_file_timestamps(destination, exif_date)
|
|
self.log(f"Set timestamps from EXIF: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
|
|
else:
|
|
timestamp_unix = exif_date.timestamp()
|
|
os.utime(destination, (timestamp_unix, timestamp_unix))
|
|
self.log(f"Set filesystem timestamp from EXIF: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
|
|
else:
|
|
# Copy file content
|
|
shutil.copy(str(source), str(destination))
|
|
|
|
# Set timestamp if provided - use DateHandler for comprehensive update
|
|
# This sets EXIF metadata (including MetadataDate for Immich) AND filesystem times
|
|
if timestamp:
|
|
if DATE_UTILS_AVAILABLE:
|
|
# Use centralized date handler for EXIF + filesystem timestamps
|
|
DateHandler.update_file_timestamps(destination, timestamp)
|
|
self.log(f"Set all timestamps to {timestamp.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
|
|
else:
|
|
# Fallback to filesystem-only timestamps
|
|
timestamp_unix = timestamp.timestamp()
|
|
os.utime(destination, (timestamp_unix, timestamp_unix))
|
|
self.log(f"Set filesystem timestamp to {timestamp.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
|
|
|
|
self.log(f"Moved: {source.name} → {destination.name}", "info")
|
|
self.stats['moved'] += 1
|
|
self.activity_manager.update_status(f"Moving images: {source.name}")
|
|
|
|
# Pre-generate thumbnail in background (non-blocking)
|
|
self._bg_executor.submit(self._generate_thumbnail_cache, destination, file_hash)
|
|
|
|
# Apply file ownership if configured
|
|
self._apply_ownership(destination)
|
|
|
|
# Update database with final file location and hash
|
|
if self.unified_db and self.batch_context:
|
|
platform = self.batch_context.get('platform')
|
|
source_name = self.batch_context.get('source')
|
|
content_type_ctx = self.batch_context.get('content_type')
|
|
|
|
# For tagged content, extract actual poster from filename (e.g., "rtlliving_20251124_..." -> "rtlliving")
|
|
if content_type_ctx == 'tagged' and source.name:
|
|
import re
|
|
# Use date pattern to correctly extract usernames that may contain underscores
|
|
# Pattern: username_YYYYMMDD_...
|
|
date_pattern = re.match(r'^(.+?)_(\d{8})_', source.name)
|
|
if date_pattern:
|
|
extracted_source = date_pattern.group(1).lower()
|
|
# Validate: Instagram usernames are 1-30 chars, alphanumeric + underscore + period
|
|
if extracted_source and re.match(r'^[a-z0-9_.]{1,30}$', extracted_source):
|
|
if extracted_source != source_name:
|
|
self.log(f"Tagged content: using poster @{extracted_source} instead of @{source_name}", "debug")
|
|
source_name = extracted_source
|
|
else:
|
|
self.log(f"Tagged content: extracted '{extracted_source}' doesn't look like valid username, keeping @{source_name}", "debug")
|
|
|
|
if platform and source_name:
|
|
try:
|
|
# Update database with final path and hash
|
|
updated = self.unified_db.update_file_location_by_filename(
|
|
filename=source.name,
|
|
platform=platform,
|
|
source=source_name,
|
|
final_path=str(destination)
|
|
)
|
|
|
|
# Batch all path updates in a single transaction
|
|
with self.unified_db.get_connection(for_write=True) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# If moved to review queue, add intended_path to metadata
|
|
if updated and moved_to_review:
|
|
import json
|
|
cursor.execute('''
|
|
SELECT metadata FROM downloads
|
|
WHERE filename = ? AND platform = ? AND source = ?
|
|
''', (source.name, platform, source_name))
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
metadata = json.loads(row['metadata']) if row['metadata'] else {}
|
|
metadata['intended_path'] = original_intended_path
|
|
cursor.execute('''
|
|
UPDATE downloads
|
|
SET metadata = ?
|
|
WHERE filename = ? AND platform = ? AND source = ?
|
|
''', (json.dumps(metadata), source.name, platform, source_name))
|
|
self.log(f"Saved intended destination to metadata: {original_intended_path}", "debug")
|
|
|
|
# Update perceptual hash path
|
|
try:
|
|
cursor.execute('''
|
|
UPDATE instagram_perceptual_hashes
|
|
SET file_path = ?
|
|
WHERE filename = ? AND platform = ? AND source = ?
|
|
''', (str(destination), source.name, platform, source_name))
|
|
if cursor.rowcount > 0:
|
|
self.log(f"Updated perceptual hash path: {destination}", "debug")
|
|
except Exception as e:
|
|
self.log(f"Failed to update perceptual hash path: {e}", "debug")
|
|
|
|
# Update face recognition scans path
|
|
try:
|
|
cursor.execute('''
|
|
UPDATE face_recognition_scans
|
|
SET file_path = ?
|
|
WHERE file_path = ?
|
|
''', (str(destination), original_intended_path))
|
|
if cursor.rowcount > 0:
|
|
self.log(f"Updated face recognition scan path: {original_intended_path} -> {destination}", "debug")
|
|
except Exception as e:
|
|
self.log(f"Failed to update face recognition scan path: {e}", "debug")
|
|
|
|
if updated:
|
|
self.log(f"Updated database with final location: {destination}", "debug")
|
|
else:
|
|
self.log(f"No database record found to update for {source.name}", "debug")
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to update database location: {e}", "debug")
|
|
|
|
# Record in file_inventory in background (dimensions + discovery queue)
|
|
# This avoids blocking the move pipeline on ffprobe/PIL calls
|
|
self._bg_executor.submit(
|
|
self._record_file_inventory_bg,
|
|
destination, source.name, platform, source_name,
|
|
moved_to_review, file_hash, timestamp
|
|
)
|
|
|
|
# Track for batch notification if in batch mode
|
|
# Auto-detect content type from path if not provided
|
|
if not content_type and self.batch_context:
|
|
# Prefer the batch-level content_type (set by caller who knows what's being downloaded)
|
|
# This prevents path-based inference from overriding e.g. 'media' with 'post'
|
|
# just because the temp dir contains "posts/"
|
|
batch_ct = self.batch_context.get('content_type')
|
|
if batch_ct:
|
|
# Normalize plural forms to singular for proper notification grammar
|
|
_SINGULAR = {'posts': 'post', 'stories': 'story', 'reels': 'reel',
|
|
'videos': 'video', 'images': 'image', 'items': 'item'}
|
|
content_type = _SINGULAR.get(batch_ct, batch_ct)
|
|
else:
|
|
# No batch content_type set, try to infer from source path
|
|
path_str = str(source).lower()
|
|
if 'story' in path_str or 'stories' in path_str:
|
|
content_type = 'story'
|
|
elif 'reel' in path_str:
|
|
content_type = 'reel'
|
|
elif 'post' in path_str:
|
|
content_type = 'post'
|
|
elif 'video' in path_str:
|
|
content_type = 'video'
|
|
elif 'image' in path_str or 'photo' in path_str:
|
|
content_type = 'image'
|
|
|
|
# Track with full path for image attachment in notifications
|
|
self.track_moved_file(str(destination), content_type=content_type, is_review=moved_to_review)
|
|
|
|
# Update repost queue with actual final destination (in case it changed due to review queue)
|
|
if queued_for_repost_check and self.repost_queue:
|
|
# Find the most recent queue entry for this file and update with actual final destination
|
|
for entry in reversed(self.repost_queue):
|
|
if Path(entry['file_path']).name == destination.name:
|
|
# Update with actual final path (might be review queue)
|
|
entry['file_path'] = str(destination)
|
|
self.log(f"Updated repost queue entry with final destination: {destination.name}", "debug")
|
|
break
|
|
|
|
# Emit WebSocket event for real-time scraping monitor
|
|
if self.event_emitter and self.current_session:
|
|
try:
|
|
# Determine media type
|
|
media_type = 'video' if self._is_video_file(destination) else 'image'
|
|
|
|
# Determine destination type (media/review/recycle)
|
|
dest_str = str(destination).lower()
|
|
if moved_to_review or '/review' in dest_str:
|
|
destination_type = 'review'
|
|
elif '/recycle' in dest_str:
|
|
destination_type = 'recycle'
|
|
else:
|
|
destination_type = 'media'
|
|
|
|
# Generate thumbnail URL
|
|
import urllib.parse
|
|
thumbnail_url = f"/api/files/thumbnail?path={urllib.parse.quote(str(destination))}"
|
|
|
|
# Get face match info if available
|
|
face_match = {'matched': False}
|
|
if hasattr(self, '_last_face_result') and self._last_face_result:
|
|
if self._last_face_result.get('has_match'):
|
|
face_match = {
|
|
'matched': True,
|
|
'person_name': self._last_face_result.get('person_name'),
|
|
'confidence': self._last_face_result.get('confidence')
|
|
}
|
|
|
|
# Emit file_moved event
|
|
self.event_emitter.emit_file_moved(
|
|
session_id=self.current_session.get('session_id', 'unknown'),
|
|
platform=self.current_session.get('platform', 'unknown'),
|
|
account=self.current_session.get('account', 'unknown'),
|
|
filename=destination.name,
|
|
media_type=media_type,
|
|
destination_type=destination_type,
|
|
destination_path=str(destination),
|
|
thumbnail_url=thumbnail_url,
|
|
face_match=face_match
|
|
)
|
|
except Exception as emit_err:
|
|
self.log(f"Failed to emit file_moved event: {emit_err}", "debug")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to move {source.name}: {e}", "error")
|
|
self.stats['failed'] += 1
|
|
return False
|
|
|
|
def move_files_batch(self,
|
|
source_dir: Union[str, Path],
|
|
dest_dir: Union[str, Path],
|
|
file_timestamps: Dict[str, Optional[datetime]] = None,
|
|
extensions: List[str] = None,
|
|
preserve_if_no_timestamp: bool = True) -> Dict:
|
|
"""
|
|
Move multiple files from source to destination with timestamp management
|
|
|
|
Args:
|
|
source_dir: Source directory
|
|
dest_dir: Destination directory
|
|
file_timestamps: Dict mapping filename to datetime (or None to preserve)
|
|
extensions: List of file extensions to move (e.g., ['.jpg', '.mp4'])
|
|
preserve_if_no_timestamp: If True, preserve timestamp for files not in dict
|
|
|
|
Returns:
|
|
Statistics dictionary
|
|
"""
|
|
source_dir = Path(source_dir)
|
|
dest_dir = Path(dest_dir)
|
|
file_timestamps = file_timestamps or {}
|
|
|
|
# Skip if source and destination are the same
|
|
if source_dir.resolve() == dest_dir.resolve():
|
|
self.log("Source and destination are the same, skipping move", "info")
|
|
return {"moved": 0, "failed": 0, "errors": []}
|
|
|
|
if not source_dir.exists():
|
|
self.log(f"Source directory not found: {source_dir}", "error")
|
|
return self.stats
|
|
|
|
# Find all files to move
|
|
files_to_move = []
|
|
for pattern in ['*'] if not extensions else [f'*{ext}' for ext in extensions]:
|
|
files_to_move.extend(source_dir.rglob(pattern))
|
|
|
|
# Filter to only files (not directories)
|
|
files_to_move = [f for f in files_to_move if f.is_file()]
|
|
|
|
# Filter by extension if specified
|
|
if extensions:
|
|
files_to_move = [f for f in files_to_move if f.suffix.lower() in extensions]
|
|
|
|
self.log(f"Found {len(files_to_move)} files to move", "info")
|
|
total_files = len(files_to_move)
|
|
|
|
for file_idx, source_file in enumerate(files_to_move):
|
|
# Determine destination path
|
|
relative_path = source_file.relative_to(source_dir)
|
|
dest_file = dest_dir / relative_path
|
|
|
|
# Get timestamp for this file
|
|
timestamp = file_timestamps.get(source_file.name)
|
|
|
|
# Update file-level progress
|
|
self.activity_manager.update_status(
|
|
f"Processing {source_file.name}",
|
|
progress_current=file_idx + 1,
|
|
progress_total=total_files
|
|
)
|
|
|
|
# Move the file
|
|
self._last_move_had_face_recognition = False
|
|
self.move_file(
|
|
source_file,
|
|
dest_file,
|
|
timestamp=timestamp,
|
|
preserve_if_no_timestamp=preserve_if_no_timestamp
|
|
)
|
|
|
|
# Periodic GC after face recognition batches to free ML tensors
|
|
if self._last_move_had_face_recognition and (file_idx + 1) % 10 == 0:
|
|
gc.collect()
|
|
|
|
return self.stats
|
|
|
|
def move_with_metadata(self, file_info: Dict) -> bool:
|
|
"""
|
|
Move a file using metadata dictionary
|
|
|
|
Args:
|
|
file_info: Dictionary containing:
|
|
- source: Source file path
|
|
- destination: Destination file path
|
|
- timestamp: Optional datetime object
|
|
- preserve_original: If True and no timestamp, preserve original
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
source = file_info.get('source')
|
|
destination = file_info.get('destination')
|
|
timestamp = file_info.get('timestamp')
|
|
preserve = file_info.get('preserve_original', True)
|
|
|
|
if not source or not destination:
|
|
self.log("Missing source or destination in file_info", "error")
|
|
return False
|
|
|
|
return self.move_file(source, destination, timestamp, preserve)
|
|
|
|
def get_stats(self) -> Dict:
|
|
"""Get movement statistics"""
|
|
return self.stats.copy()
|
|
|
|
def reset_stats(self):
|
|
"""Reset statistics"""
|
|
self.stats = {
|
|
'moved': 0,
|
|
'skipped': 0,
|
|
'failed': 0,
|
|
'duplicates': 0
|
|
}
|
|
|
|
def start_batch(self, platform: str, source: str = None, content_type: str = None, search_term: str = None):
|
|
"""
|
|
Start a batch move operation for notifications
|
|
|
|
Args:
|
|
platform: Platform name (instagram, tiktok, forum, etc.)
|
|
source: Source/username
|
|
content_type: Type of content (post, story, reel, thread, etc.)
|
|
search_term: Optional search term (for forum searches)
|
|
"""
|
|
self.batch_context = {
|
|
'platform': platform,
|
|
'source': source,
|
|
'content_type': content_type,
|
|
'search_term': search_term
|
|
}
|
|
self.moved_files = []
|
|
# Clear per-batch settings caches so DB changes take effect between batches
|
|
self._face_recognition_settings_cache = None
|
|
self._video_face_settings_cache = None
|
|
self._review_path = None
|
|
self.log(f"Started batch move for {platform}/{source or 'unknown'}/{content_type or 'items'}", "debug")
|
|
|
|
def end_batch(self):
|
|
"""
|
|
End batch move operation and send notification
|
|
|
|
Returns:
|
|
Number of files moved in this batch
|
|
"""
|
|
if not self.batch_context:
|
|
return 0
|
|
|
|
# Wait for background tasks (thumbnails, inventory) to complete
|
|
# before sending notifications
|
|
self._bg_executor.shutdown(wait=True)
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='move_bg')
|
|
|
|
moved_count = len(self.moved_files)
|
|
review_count = len(self.review_queue_files)
|
|
|
|
platform = self.batch_context.get('platform', 'unknown')
|
|
source = self.batch_context.get('source')
|
|
content_type = self.batch_context.get('content_type') or 'item' # Handle None explicitly
|
|
search_term = self.batch_context.get('search_term')
|
|
|
|
# Send batch notification for matched files (moved to final destination)
|
|
if self.notifier and moved_count > 0:
|
|
try:
|
|
# Prepare download list for notification
|
|
# Use individual file content types if available, otherwise fall back to batch content type
|
|
downloads = []
|
|
for file_info in self.moved_files:
|
|
downloads.append({
|
|
'source': source,
|
|
'content_type': file_info.get('content_type') or content_type,
|
|
'filename': file_info.get('filename'),
|
|
'file_path': file_info.get('file_path') # Full path for image attachment
|
|
})
|
|
|
|
# Send batch notification
|
|
success = self.notifier.notify_batch_download(
|
|
platform=platform,
|
|
downloads=downloads,
|
|
search_term=search_term
|
|
)
|
|
|
|
if success:
|
|
self.log(f"Sent notification: {moved_count} {content_type}(s) from {source or platform}", "info")
|
|
else:
|
|
self.log("Failed to send notification", "warning")
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to send batch notification: {e}", "error")
|
|
|
|
# Send separate notification for review queue items (no face match)
|
|
if self.notifier and review_count > 0:
|
|
try:
|
|
# Prepare review queue list for notification
|
|
downloads = []
|
|
for file_info in self.review_queue_files:
|
|
downloads.append({
|
|
'source': source,
|
|
'content_type': file_info.get('content_type') or content_type,
|
|
'filename': file_info.get('filename'),
|
|
'file_path': file_info.get('file_path') # Full path for image attachment
|
|
})
|
|
|
|
# Send review queue notification with lower priority
|
|
success = self.notifier.notify_batch_download(
|
|
platform=platform,
|
|
downloads=downloads,
|
|
search_term=search_term,
|
|
is_review_queue=True # Flag to indicate this is review queue notification
|
|
)
|
|
|
|
if success:
|
|
self.log(f"Sent review queue notification: {review_count} {content_type}(s) from {source or platform}", "info")
|
|
else:
|
|
self.log("Review queue notification not sent (may be disabled in settings)", "debug")
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to send review queue notification: {e}", "error")
|
|
|
|
# Process repost queue (download originals, find matches, replace)
|
|
# This happens after all files are moved and notifications sent
|
|
if platform in ['instagram', 'fastdl', 'imginn', 'toolzu', 'instaloader']:
|
|
self.process_repost_queue()
|
|
|
|
# Call download complete callback to trigger UI updates
|
|
total_count = moved_count + review_count
|
|
if total_count > 0 and self.on_download_complete:
|
|
try:
|
|
self.on_download_complete(platform, source, total_count)
|
|
except Exception as e:
|
|
self.log(f"Error in download complete callback: {e}", "error")
|
|
|
|
# Clear batch context
|
|
self.batch_context = None
|
|
self.moved_files = []
|
|
self.review_queue_files = []
|
|
|
|
return moved_count + review_count
|
|
|
|
def _is_instagram_story(self, file_path: Path) -> bool:
|
|
"""Check if file is an Instagram story based on path"""
|
|
path_str = str(file_path).lower()
|
|
return 'story' in path_str or 'stories' in path_str
|
|
|
|
def _is_repost_detection_enabled(self) -> bool:
|
|
"""Check if repost detection is enabled in settings"""
|
|
try:
|
|
if not self.unified_db:
|
|
return False
|
|
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM settings WHERE key = 'repost_detection'")
|
|
result = cursor.fetchone()
|
|
if result:
|
|
import json
|
|
settings = json.loads(result[0])
|
|
return settings.get('enabled', False) # Default: DISABLED
|
|
except (sqlite3.Error, json.JSONDecodeError, KeyError) as e:
|
|
self.log(f"Error checking repost detection setting: {e}", "debug")
|
|
return False # Default: DISABLED
|
|
|
|
def _check_repost_and_replace(self, file_path: str, source_username: str, destination_path: str = None) -> Optional[str]:
|
|
"""
|
|
Queue file for repost detection (processed after batch completes)
|
|
|
|
Args:
|
|
file_path: Current temp file path (for OCR scanning)
|
|
source_username: Username who posted this
|
|
destination_path: Final permanent storage path (used in queue)
|
|
|
|
Returns None (queueing only, no immediate replacement)
|
|
"""
|
|
# Check if feature is enabled
|
|
if not self._is_repost_detection_enabled():
|
|
return None # Feature disabled - skip
|
|
|
|
try:
|
|
from modules.instagram_repost_detector import InstagramRepostDetector
|
|
|
|
# Initialize detector for OCR check only
|
|
detector = InstagramRepostDetector(
|
|
unified_db=self.unified_db,
|
|
log_callback=lambda msg, lvl: self.log(msg, lvl)
|
|
)
|
|
|
|
# Quick OCR check to see if it's a repost (using temp file)
|
|
original_username = detector._extract_username_from_repost(file_path)
|
|
|
|
if original_username and original_username.lower() != source_username.lower():
|
|
# Queue for processing after moves complete
|
|
# IMPORTANT: Store destination_path (permanent location) NOT file_path (temp)
|
|
path_to_queue = destination_path if destination_path else file_path
|
|
|
|
self.repost_queue.append({
|
|
'file_path': path_to_queue,
|
|
'source_username': source_username,
|
|
'detected_username': original_username
|
|
})
|
|
self.log(f"Queued repost detection: {Path(path_to_queue).name} → @{original_username}", "info")
|
|
return "queued" # Signal that file was queued
|
|
|
|
return None # No repost detected, no immediate replacement
|
|
|
|
except Exception as e:
|
|
self.log(f"Repost queue check failed: {e}", "debug")
|
|
return None
|
|
|
|
def process_repost_queue(self):
|
|
"""Process all queued reposts (called after batch completes)"""
|
|
if not self.repost_queue:
|
|
return
|
|
|
|
self.log(f"Processing {len(self.repost_queue)} queued reposts...", "info")
|
|
|
|
try:
|
|
from modules.instagram_repost_detector import InstagramRepostDetector
|
|
|
|
detector = InstagramRepostDetector(
|
|
unified_db=self.unified_db,
|
|
log_callback=lambda msg, lvl: self.log(msg, lvl)
|
|
)
|
|
|
|
# Group by detected username to avoid re-downloading same user
|
|
users_to_process = {}
|
|
for item in self.repost_queue:
|
|
username = item['detected_username']
|
|
if username not in users_to_process:
|
|
users_to_process[username] = []
|
|
users_to_process[username].append(item)
|
|
|
|
# Process each user's reposts
|
|
processed = 0
|
|
replaced = 0
|
|
|
|
for username, items in users_to_process.items():
|
|
self.log(f"Processing {len(items)} repost(s) from @{username}", "info")
|
|
|
|
for item in items:
|
|
try:
|
|
# Process the repost (download if needed, find match, replace)
|
|
replacement = detector.check_and_replace_repost(
|
|
item['file_path'],
|
|
item['source_username']
|
|
)
|
|
|
|
if replacement:
|
|
self.log(f"✓ Replaced repost with original from @{username}", "success")
|
|
replaced += 1
|
|
|
|
processed += 1
|
|
|
|
except Exception as e:
|
|
self.log(f"Failed to process repost for @{username}: {e}", "error")
|
|
|
|
self.log(f"Repost queue processed: {processed} checked, {replaced} replaced", "info")
|
|
|
|
except Exception as e:
|
|
self.log(f"Repost queue processing failed: {e}", "error")
|
|
|
|
finally:
|
|
# Clear the queue
|
|
self.repost_queue = []
|
|
|
|
def _check_perceptual_duplicate(self, file_path: str, platform: str, source: str, content_type: str = None) -> Optional[str]:
|
|
"""
|
|
Check if file is a perceptual duplicate (visually similar with overlays) for Instagram
|
|
|
|
Returns:
|
|
- None if not a duplicate or feature disabled
|
|
- "skip" if this file should be skipped (lower quality duplicate)
|
|
- file_path if this file should be kept
|
|
"""
|
|
try:
|
|
from modules.instagram_perceptual_duplicate_detector import InstagramPerceptualDuplicateDetector
|
|
|
|
# Initialize detector (uses its own universal logger now)
|
|
detector = InstagramPerceptualDuplicateDetector(
|
|
unified_db=self.unified_db
|
|
)
|
|
|
|
# Run detection
|
|
result = detector.check_and_handle_duplicate(file_path, platform, source, content_type)
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
self.log(f"Perceptual duplicate detection failed: {e}", "error")
|
|
import traceback
|
|
self.log(traceback.format_exc(), "error")
|
|
return None
|
|
|
|
def track_moved_file(self, file_path: str, metadata: Dict[str, Any] = None, content_type: str = None, is_review: bool = False):
|
|
"""
|
|
Track a moved file for batch notification
|
|
|
|
Args:
|
|
file_path: Full path to the moved file (for image attachment)
|
|
metadata: Optional metadata dictionary
|
|
content_type: Optional specific content type for this file (overrides batch default)
|
|
is_review: True if file was moved to review queue (no face match)
|
|
"""
|
|
if self.batch_context:
|
|
file_info = {
|
|
'file_path': file_path,
|
|
'filename': Path(file_path).name, # Extract filename for logging
|
|
'metadata': metadata or {},
|
|
'content_type': content_type # Track individual file's content type
|
|
}
|
|
|
|
if is_review:
|
|
self.review_queue_files.append(file_info)
|
|
else:
|
|
self.moved_files.append(file_info)
|
|
|
|
|
|
def move_files_simple(source_dir: Union[str, Path],
|
|
dest_dir: Union[str, Path],
|
|
extensions: List[str] = None,
|
|
file_timestamps: Dict[str, datetime] = None,
|
|
log_callback=None) -> Dict:
|
|
"""
|
|
Simple function interface for moving files
|
|
|
|
Args:
|
|
source_dir: Source directory
|
|
dest_dir: Destination directory
|
|
extensions: List of file extensions to move
|
|
file_timestamps: Optional dict mapping filenames to timestamps
|
|
log_callback: Optional logging callback
|
|
|
|
Returns:
|
|
Statistics dict
|
|
"""
|
|
manager = MoveManager(log_callback=log_callback)
|
|
return manager.move_files_batch(
|
|
source_dir,
|
|
dest_dir,
|
|
file_timestamps=file_timestamps,
|
|
extensions=extensions
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test the module
|
|
import tempfile
|
|
|
|
print("Testing MoveManager...")
|
|
|
|
# Create test environment
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
test_dir = Path(tmpdir)
|
|
src_dir = test_dir / "source"
|
|
dst_dir = test_dir / "destination"
|
|
src_dir.mkdir()
|
|
|
|
# Create test files
|
|
test_file1 = src_dir / "test1.jpg"
|
|
test_file2 = src_dir / "test2.mp4"
|
|
test_file1.write_text("content1")
|
|
test_file2.write_text("content2")
|
|
|
|
# Test moving with timestamps
|
|
manager = MoveManager()
|
|
|
|
# Move with specific timestamp
|
|
specific_time = datetime(2025, 8, 26, 17, 2, 24)
|
|
manager.move_file(test_file1, dst_dir / "test1.jpg", timestamp=specific_time)
|
|
|
|
# Move preserving original
|
|
manager.move_file(test_file2, dst_dir / "test2.mp4", timestamp=None)
|
|
|
|
# Check results
|
|
dst_file1 = dst_dir / "test1.jpg"
|
|
if dst_file1.exists():
|
|
mtime = datetime.fromtimestamp(os.stat(dst_file1).st_mtime)
|
|
print(f"✅ File 1 moved with timestamp: {mtime}")
|
|
|
|
print(f"Stats: {manager.get_stats()}") |