309
tests/test_repost_detection_manual.py
Normal file
309
tests/test_repost_detection_manual.py
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Manual Integration Test for Instagram Repost Detector
|
||||
|
||||
This script tests the repost detector with real files and can be run manually
|
||||
to validate the implementation before integrating into the main system.
|
||||
|
||||
Usage:
|
||||
python3 tests/test_repost_detection_manual.py [test_file_path] [source_username]
|
||||
|
||||
Example:
|
||||
python3 tests/test_repost_detection_manual.py \
|
||||
"/media/.../evalongoria_20251109_154548_story6.mp4" \
|
||||
"evalongoria"
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from modules.instagram_repost_detector import InstagramRepostDetector
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
|
||||
|
||||
def test_dependencies():
|
||||
"""Test if all dependencies are installed"""
|
||||
print("=" * 70)
|
||||
print("CHECKING DEPENDENCIES")
|
||||
print("=" * 70)
|
||||
|
||||
missing = []
|
||||
|
||||
try:
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
print("✓ pytesseract and PIL installed")
|
||||
except ImportError:
|
||||
print("✗ pytesseract or PIL not installed")
|
||||
print(" Install: pip3 install pytesseract pillow")
|
||||
missing.append("pytesseract/PIL")
|
||||
|
||||
try:
|
||||
import cv2
|
||||
print("✓ opencv-python installed")
|
||||
except ImportError:
|
||||
print("✗ opencv-python not installed")
|
||||
print(" Install: pip3 install opencv-python")
|
||||
missing.append("opencv-python")
|
||||
|
||||
try:
|
||||
import imagehash
|
||||
print("✓ imagehash installed")
|
||||
except ImportError:
|
||||
print("✗ imagehash not installed")
|
||||
print(" Install: pip3 install imagehash")
|
||||
missing.append("imagehash")
|
||||
|
||||
# Check tesseract binary
|
||||
try:
|
||||
import pytesseract
|
||||
pytesseract.get_tesseract_version()
|
||||
print("✓ tesseract-ocr binary installed")
|
||||
except Exception:
|
||||
print("✗ tesseract-ocr binary not installed")
|
||||
print(" Install: sudo apt-get install tesseract-ocr tesseract-ocr-eng")
|
||||
missing.append("tesseract-ocr")
|
||||
|
||||
print()
|
||||
|
||||
if missing:
|
||||
print(f"❌ Missing dependencies: {', '.join(missing)}")
|
||||
print("\nPlease install missing dependencies before running tests.")
|
||||
return False
|
||||
else:
|
||||
print("✅ All dependencies installed")
|
||||
return True
|
||||
|
||||
|
||||
def test_ocr_extraction(file_path: str):
|
||||
"""Test OCR username extraction on a file"""
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 1: OCR USERNAME EXTRACTION")
|
||||
print("=" * 70)
|
||||
print(f"File: {file_path}")
|
||||
|
||||
# Create mock database for testing
|
||||
db = UnifiedDatabase()
|
||||
detector = InstagramRepostDetector(unified_db=db)
|
||||
|
||||
username = detector._extract_username_from_repost(file_path)
|
||||
|
||||
if username:
|
||||
print(f"✅ SUCCESS: Extracted username: @{username}")
|
||||
return username
|
||||
else:
|
||||
print("❌ FAILED: No username found")
|
||||
return None
|
||||
|
||||
|
||||
def test_monitored_check(username: str):
|
||||
"""Test if username is in monitored accounts"""
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 2: MONITORED ACCOUNT CHECK")
|
||||
print("=" * 70)
|
||||
print(f"Username: @{username}")
|
||||
|
||||
db = UnifiedDatabase()
|
||||
detector = InstagramRepostDetector(unified_db=db)
|
||||
|
||||
is_monitored = detector._is_monitored_account(username)
|
||||
|
||||
if is_monitored:
|
||||
print(f"✅ @{username} IS monitored (will use normal download path)")
|
||||
else:
|
||||
print(f"ℹ️ @{username} NOT monitored (will use temp queue)")
|
||||
|
||||
return is_monitored
|
||||
|
||||
|
||||
def test_perceptual_hash(file_path: str):
|
||||
"""Test perceptual hash calculation"""
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 3: PERCEPTUAL HASH CALCULATION")
|
||||
print("=" * 70)
|
||||
print(f"File: {file_path}")
|
||||
|
||||
db = UnifiedDatabase()
|
||||
detector = InstagramRepostDetector(unified_db=db)
|
||||
|
||||
hash_value = detector._get_perceptual_hash(file_path)
|
||||
|
||||
if hash_value:
|
||||
print(f"✅ SUCCESS: Hash = {hash_value}")
|
||||
return hash_value
|
||||
else:
|
||||
print("❌ FAILED: Could not calculate hash")
|
||||
return None
|
||||
|
||||
|
||||
def test_full_detection(file_path: str, source_username: str, dry_run: bool = True):
|
||||
"""Test full repost detection workflow"""
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 4: FULL REPOST DETECTION WORKFLOW")
|
||||
print("=" * 70)
|
||||
print(f"File: {file_path}")
|
||||
print(f"Source: @{source_username}")
|
||||
print(f"Mode: {'DRY RUN (no downloads)' if dry_run else 'LIVE (will download)'}")
|
||||
|
||||
if dry_run:
|
||||
print("\n⚠️ DRY RUN MODE - Will not download content from ImgInn")
|
||||
print("To test with actual downloads, run with --live flag")
|
||||
return None
|
||||
|
||||
db = UnifiedDatabase()
|
||||
detector = InstagramRepostDetector(unified_db=db)
|
||||
|
||||
print("\nStarting detection...")
|
||||
replacement = detector.check_and_replace_repost(file_path, source_username)
|
||||
|
||||
if replacement:
|
||||
print(f"\n✅ SUCCESS: Repost replaced!")
|
||||
print(f"Original file: {file_path}")
|
||||
print(f"Replacement file: {replacement}")
|
||||
return replacement
|
||||
else:
|
||||
print("\n❌ FAILED: No replacement found")
|
||||
print("Possible reasons:")
|
||||
print(" - No @username detected in the file")
|
||||
print(" - Original content not available")
|
||||
print(" - No matching content found via perceptual hash")
|
||||
return None
|
||||
|
||||
|
||||
def test_database_tracking():
|
||||
"""Test database tracking tables"""
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST 5: DATABASE TRACKING")
|
||||
print("=" * 70)
|
||||
|
||||
db = UnifiedDatabase()
|
||||
|
||||
# Check if repost_fetch_cache table exists
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check fetch cache
|
||||
cursor.execute("""
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name='repost_fetch_cache'
|
||||
""")
|
||||
has_cache = cursor.fetchone() is not None
|
||||
|
||||
# Check replacements table
|
||||
cursor.execute("""
|
||||
SELECT name FROM sqlite_master
|
||||
WHERE type='table' AND name='repost_replacements'
|
||||
""")
|
||||
has_replacements = cursor.fetchone() is not None
|
||||
|
||||
if has_cache:
|
||||
print("✓ repost_fetch_cache table exists")
|
||||
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM repost_fetch_cache")
|
||||
count = cursor.fetchone()[0]
|
||||
print(f" {count} usernames in cache")
|
||||
else:
|
||||
print("ℹ️ repost_fetch_cache table will be created on first use")
|
||||
|
||||
if has_replacements:
|
||||
print("✓ repost_replacements table exists")
|
||||
|
||||
with db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM repost_replacements")
|
||||
count = cursor.fetchone()[0]
|
||||
print(f" {count} replacements tracked")
|
||||
|
||||
if count > 0:
|
||||
print("\nRecent replacements:")
|
||||
cursor.execute("""
|
||||
SELECT repost_source, original_username, repost_filename, detected_at
|
||||
FROM repost_replacements
|
||||
ORDER BY detected_at DESC
|
||||
LIMIT 5
|
||||
""")
|
||||
for row in cursor.fetchall():
|
||||
print(f" - @{row[0]} reposted from @{row[1]}: {row[2]} ({row[3]})")
|
||||
else:
|
||||
print("ℹ️ repost_replacements table will be created on first use")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main test runner"""
|
||||
print("\n" + "=" * 70)
|
||||
print("INSTAGRAM REPOST DETECTOR - MANUAL TEST SUITE")
|
||||
print("=" * 70)
|
||||
|
||||
# Check if test file provided
|
||||
if len(sys.argv) < 2:
|
||||
print("\nUsage:")
|
||||
print(" python3 tests/test_repost_detection_manual.py [file_path] [source_username] [--live]")
|
||||
print("\nExamples:")
|
||||
print(" # Test with real example file (dry run)")
|
||||
print(' python3 tests/test_repost_detection_manual.py \\')
|
||||
print(' "/media/.../evalongoria_20251109_154548_story6.mp4" \\')
|
||||
print(' "evalongoria"')
|
||||
print()
|
||||
print(" # Test with actual downloads")
|
||||
print(' python3 tests/test_repost_detection_manual.py \\')
|
||||
print(' "/media/.../evalongoria_20251109_154548_story6.mp4" \\')
|
||||
print(' "evalongoria" \\')
|
||||
print(' --live')
|
||||
print()
|
||||
|
||||
# Run dependency check and database check only
|
||||
deps_ok = test_dependencies()
|
||||
if deps_ok:
|
||||
test_database_tracking()
|
||||
return
|
||||
|
||||
file_path = sys.argv[1]
|
||||
source_username = sys.argv[2] if len(sys.argv) >= 3 else "unknown"
|
||||
dry_run = "--live" not in sys.argv
|
||||
|
||||
# Validate file exists
|
||||
if not os.path.exists(file_path):
|
||||
print(f"\n❌ ERROR: File not found: {file_path}")
|
||||
return
|
||||
|
||||
# Test 1: Dependencies
|
||||
deps_ok = test_dependencies()
|
||||
if not deps_ok:
|
||||
print("\n⚠️ Cannot proceed with tests - missing dependencies")
|
||||
return
|
||||
|
||||
# Test 2: OCR extraction
|
||||
username = test_ocr_extraction(file_path)
|
||||
|
||||
# Test 3: Monitored check (if username found)
|
||||
if username:
|
||||
test_monitored_check(username)
|
||||
|
||||
# Test 4: Perceptual hash
|
||||
test_perceptual_hash(file_path)
|
||||
|
||||
# Test 5: Database tracking
|
||||
test_database_tracking()
|
||||
|
||||
# Test 6: Full detection (if not dry run)
|
||||
if not dry_run:
|
||||
test_full_detection(file_path, source_username, dry_run=False)
|
||||
else:
|
||||
print("\n" + "=" * 70)
|
||||
print("SKIPPING FULL WORKFLOW TEST (DRY RUN)")
|
||||
print("=" * 70)
|
||||
print("To test full workflow with actual downloads, add --live flag")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("TEST SUITE COMPLETE")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user