Fix Instagram detection and notification filter bugs

- Update browser fingerprint from Edge 101 to Chrome 136 with Edge/macOS headers
- Add missing headers: X-CSRFToken, X-IG-WWW-Claim, X-ASBD-ID
- Reduce page size from 33 to 12 (matches real browser behavior)
- Add randomized delays between requests and between creators (8-15s cooldown)
- Update X-IG-WWW-Claim dynamically from response headers
- Fix notification tagged-user filter using wrong column names (p.created_at, a.updated_at don't exist), now uses a.downloaded_at

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-30 13:28:21 -04:00
parent c5781197cc
commit 7c87fc1ff4
2 changed files with 78 additions and 33 deletions

View File

@@ -2885,6 +2885,11 @@ class PaidContentScraper(LoggingMixin, DeferredDownloadsMixin):
})
return SyncResult(success=False, error=str(e))
finally:
# Inter-creator cooldown: wait 8-15s before releasing the lock so
# the next Instagram creator doesn't fire immediately. This makes
# the request pattern look like a human browsing between profiles.
import random as _random
await asyncio.sleep(_random.uniform(8.0, 15.0))
ig_rate_limiter.operation_lock.release()
async def _fetch_stories_via_fastdl(self, username: str) -> Tuple[List, Dict]:
@@ -8148,17 +8153,8 @@ class PaidContentScraper(LoggingMixin, DeferredDownloadsMixin):
placeholders = ','.join(['?'] * len(filter_users))
with self.db.unified_db.get_connection() as conn:
cursor = conn.cursor()
# Count new posts that have at least one matching tagged user
cursor.execute(f"""
SELECT COUNT(DISTINCT p.id)
FROM paid_content_posts p
JOIN paid_content_post_tagged_users tu ON tu.post_id = p.id
WHERE p.creator_id = ?
AND tu.username IN ({placeholders})
AND p.created_at >= datetime('now', '-1 hour')
""", (creator['id'], *filter_users))
filtered_new_posts = cursor.fetchone()[0]
# Count downloaded attachments from matching posts
# Count downloaded attachments from posts matching the tagged-user filter.
# Use downloaded_at as the time anchor (added_at is often NULL).
cursor.execute(f"""
SELECT COUNT(DISTINCT a.id)
FROM paid_content_attachments a
@@ -8167,9 +8163,21 @@ class PaidContentScraper(LoggingMixin, DeferredDownloadsMixin):
WHERE p.creator_id = ?
AND tu.username IN ({placeholders})
AND a.status = 'downloaded'
AND a.updated_at >= datetime('now', '-1 hour')
AND a.downloaded_at >= datetime('now', '-1 hour')
""", (creator['id'], *filter_users))
filtered_downloaded = cursor.fetchone()[0]
# Count distinct posts that had matching downloads
cursor.execute(f"""
SELECT COUNT(DISTINCT p.id)
FROM paid_content_posts p
JOIN paid_content_post_tagged_users tu ON tu.post_id = p.id
JOIN paid_content_attachments a ON a.post_id = p.id
WHERE p.creator_id = ?
AND tu.username IN ({placeholders})
AND a.status = 'downloaded'
AND a.downloaded_at >= datetime('now', '-1 hour')
""", (creator['id'], *filter_users))
filtered_new_posts = cursor.fetchone()[0]
self.log(f"Notification filter: {new_posts} posts -> {filtered_new_posts}, "
f"{downloaded} downloads -> {filtered_downloaded} (filter: {filter_users})", 'debug')
new_posts = filtered_new_posts