#!/usr/bin/env python3 """ Manual Integration Test for Instagram Repost Detector This script tests the repost detector with real files and can be run manually to validate the implementation before integrating into the main system. Usage: python3 tests/test_repost_detection_manual.py [test_file_path] [source_username] Example: python3 tests/test_repost_detection_manual.py \ "/media/.../evalongoria_20251109_154548_story6.mp4" \ "evalongoria" """ import sys import os from pathlib import Path # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) from modules.instagram_repost_detector import InstagramRepostDetector from modules.unified_database import UnifiedDatabase def test_dependencies(): """Test if all dependencies are installed""" print("=" * 70) print("CHECKING DEPENDENCIES") print("=" * 70) missing = [] try: import pytesseract from PIL import Image print("✓ pytesseract and PIL installed") except ImportError: print("✗ pytesseract or PIL not installed") print(" Install: pip3 install pytesseract pillow") missing.append("pytesseract/PIL") try: import cv2 print("✓ opencv-python installed") except ImportError: print("✗ opencv-python not installed") print(" Install: pip3 install opencv-python") missing.append("opencv-python") try: import imagehash print("✓ imagehash installed") except ImportError: print("✗ imagehash not installed") print(" Install: pip3 install imagehash") missing.append("imagehash") # Check tesseract binary try: import pytesseract pytesseract.get_tesseract_version() print("✓ tesseract-ocr binary installed") except Exception: print("✗ tesseract-ocr binary not installed") print(" Install: sudo apt-get install tesseract-ocr tesseract-ocr-eng") missing.append("tesseract-ocr") print() if missing: print(f"❌ Missing dependencies: {', '.join(missing)}") print("\nPlease install missing dependencies before running tests.") return False else: print("✅ All dependencies installed") return True def test_ocr_extraction(file_path: str): """Test OCR username extraction on a file""" print("\n" + "=" * 70) print("TEST 1: OCR USERNAME EXTRACTION") print("=" * 70) print(f"File: {file_path}") # Create mock database for testing db = UnifiedDatabase() detector = InstagramRepostDetector(unified_db=db) username = detector._extract_username_from_repost(file_path) if username: print(f"✅ SUCCESS: Extracted username: @{username}") return username else: print("❌ FAILED: No username found") return None def test_monitored_check(username: str): """Test if username is in monitored accounts""" print("\n" + "=" * 70) print("TEST 2: MONITORED ACCOUNT CHECK") print("=" * 70) print(f"Username: @{username}") db = UnifiedDatabase() detector = InstagramRepostDetector(unified_db=db) is_monitored = detector._is_monitored_account(username) if is_monitored: print(f"✅ @{username} IS monitored (will use normal download path)") else: print(f"ℹ️ @{username} NOT monitored (will use temp queue)") return is_monitored def test_perceptual_hash(file_path: str): """Test perceptual hash calculation""" print("\n" + "=" * 70) print("TEST 3: PERCEPTUAL HASH CALCULATION") print("=" * 70) print(f"File: {file_path}") db = UnifiedDatabase() detector = InstagramRepostDetector(unified_db=db) hash_value = detector._get_perceptual_hash(file_path) if hash_value: print(f"✅ SUCCESS: Hash = {hash_value}") return hash_value else: print("❌ FAILED: Could not calculate hash") return None def test_full_detection(file_path: str, source_username: str, dry_run: bool = True): """Test full repost detection workflow""" print("\n" + "=" * 70) print("TEST 4: FULL REPOST DETECTION WORKFLOW") print("=" * 70) print(f"File: {file_path}") print(f"Source: @{source_username}") print(f"Mode: {'DRY RUN (no downloads)' if dry_run else 'LIVE (will download)'}") if dry_run: print("\n⚠️ DRY RUN MODE - Will not download content from ImgInn") print("To test with actual downloads, run with --live flag") return None db = UnifiedDatabase() detector = InstagramRepostDetector(unified_db=db) print("\nStarting detection...") replacement = detector.check_and_replace_repost(file_path, source_username) if replacement: print(f"\n✅ SUCCESS: Repost replaced!") print(f"Original file: {file_path}") print(f"Replacement file: {replacement}") return replacement else: print("\n❌ FAILED: No replacement found") print("Possible reasons:") print(" - No @username detected in the file") print(" - Original content not available") print(" - No matching content found via perceptual hash") return None def test_database_tracking(): """Test database tracking tables""" print("\n" + "=" * 70) print("TEST 5: DATABASE TRACKING") print("=" * 70) db = UnifiedDatabase() # Check if repost_fetch_cache table exists with db.get_connection() as conn: cursor = conn.cursor() # Check fetch cache cursor.execute(""" SELECT name FROM sqlite_master WHERE type='table' AND name='repost_fetch_cache' """) has_cache = cursor.fetchone() is not None # Check replacements table cursor.execute(""" SELECT name FROM sqlite_master WHERE type='table' AND name='repost_replacements' """) has_replacements = cursor.fetchone() is not None if has_cache: print("✓ repost_fetch_cache table exists") with db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM repost_fetch_cache") count = cursor.fetchone()[0] print(f" {count} usernames in cache") else: print("ℹ️ repost_fetch_cache table will be created on first use") if has_replacements: print("✓ repost_replacements table exists") with db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM repost_replacements") count = cursor.fetchone()[0] print(f" {count} replacements tracked") if count > 0: print("\nRecent replacements:") cursor.execute(""" SELECT repost_source, original_username, repost_filename, detected_at FROM repost_replacements ORDER BY detected_at DESC LIMIT 5 """) for row in cursor.fetchall(): print(f" - @{row[0]} reposted from @{row[1]}: {row[2]} ({row[3]})") else: print("ℹ️ repost_replacements table will be created on first use") def main(): """Main test runner""" print("\n" + "=" * 70) print("INSTAGRAM REPOST DETECTOR - MANUAL TEST SUITE") print("=" * 70) # Check if test file provided if len(sys.argv) < 2: print("\nUsage:") print(" python3 tests/test_repost_detection_manual.py [file_path] [source_username] [--live]") print("\nExamples:") print(" # Test with real example file (dry run)") print(' python3 tests/test_repost_detection_manual.py \\') print(' "/media/.../evalongoria_20251109_154548_story6.mp4" \\') print(' "evalongoria"') print() print(" # Test with actual downloads") print(' python3 tests/test_repost_detection_manual.py \\') print(' "/media/.../evalongoria_20251109_154548_story6.mp4" \\') print(' "evalongoria" \\') print(' --live') print() # Run dependency check and database check only deps_ok = test_dependencies() if deps_ok: test_database_tracking() return file_path = sys.argv[1] source_username = sys.argv[2] if len(sys.argv) >= 3 else "unknown" dry_run = "--live" not in sys.argv # Validate file exists if not os.path.exists(file_path): print(f"\n❌ ERROR: File not found: {file_path}") return # Test 1: Dependencies deps_ok = test_dependencies() if not deps_ok: print("\n⚠️ Cannot proceed with tests - missing dependencies") return # Test 2: OCR extraction username = test_ocr_extraction(file_path) # Test 3: Monitored check (if username found) if username: test_monitored_check(username) # Test 4: Perceptual hash test_perceptual_hash(file_path) # Test 5: Database tracking test_database_tracking() # Test 6: Full detection (if not dry run) if not dry_run: test_full_detection(file_path, source_username, dry_run=False) else: print("\n" + "=" * 70) print("SKIPPING FULL WORKFLOW TEST (DRY RUN)") print("=" * 70) print("To test full workflow with actual downloads, add --live flag") print("\n" + "=" * 70) print("TEST SUITE COMPLETE") print("=" * 70) if __name__ == "__main__": main()