Fix Instagram detection and notification filter bugs
- Update browser fingerprint from Edge 101 to Chrome 136 with Edge/macOS headers - Add missing headers: X-CSRFToken, X-IG-WWW-Claim, X-ASBD-ID - Reduce page size from 33 to 12 (matches real browser behavior) - Add randomized delays between requests and between creators (8-15s cooldown) - Update X-IG-WWW-Claim dynamically from response headers - Fix notification tagged-user filter using wrong column names (p.created_at, a.updated_at don't exist), now uses a.downloaded_at Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -548,27 +548,49 @@ class InstagramAdapter(LoggingMixin):
|
||||
self.log(f"Authenticated API: fetching feed for @{username}{' (full backfill)' if paginate_all else ''}...", 'info')
|
||||
|
||||
try:
|
||||
import random as _random
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
session = CurlSession(impersonate='edge101')
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Sec-CH-UA': '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
'Sec-CH-UA-Mobile': '?0',
|
||||
'Sec-CH-UA-Platform': '"Windows"',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
})
|
||||
session = CurlSession(impersonate='chrome136')
|
||||
|
||||
# Load cookies first so we can extract csrftoken for the header
|
||||
csrf_token = ''
|
||||
for c in cookie_list:
|
||||
name = c.get('name', '')
|
||||
value = c.get('value', '')
|
||||
domain = c.get('domain', '.instagram.com')
|
||||
if name and value:
|
||||
session.cookies.set(name, value, domain=domain)
|
||||
if name == 'csrftoken':
|
||||
csrf_token = value
|
||||
|
||||
# Extract ig_www_claim from cookies if present (set by IG in responses)
|
||||
ig_claim = '0'
|
||||
for c in cookie_list:
|
||||
if c.get('name') == 'ig_www_claim':
|
||||
ig_claim = c.get('value', '0')
|
||||
break
|
||||
|
||||
# Override UA/platform headers to match Edge on macOS (where
|
||||
# the cookies were created). The chrome136 TLS fingerprint is
|
||||
# identical to Edge 136 since both use Chromium's BoringSSL.
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',
|
||||
'Sec-CH-UA': '"Microsoft Edge";v="136", "Chromium";v="136", "Not_A Brand";v="24"',
|
||||
'Sec-CH-UA-Mobile': '?0',
|
||||
'Sec-CH-UA-Platform': '"macOS"',
|
||||
'X-CSRFToken': csrf_token,
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-IG-WWW-Claim': ig_claim,
|
||||
'X-ASBD-ID': '129477',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': f'https://www.instagram.com/{username}/',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
})
|
||||
|
||||
def _save_cookies():
|
||||
try:
|
||||
@@ -590,13 +612,22 @@ class InstagramAdapter(LoggingMixin):
|
||||
except Exception as e:
|
||||
self.log(f"Authenticated API: failed to save cookies: {e}", 'debug')
|
||||
|
||||
def _update_claim_header(resp):
|
||||
"""Update X-IG-WWW-Claim from response headers (IG rotates this)."""
|
||||
claim = resp.headers.get('x-ig-set-www-claim')
|
||||
if claim:
|
||||
session.headers['X-IG-WWW-Claim'] = claim
|
||||
|
||||
# ── Single-page mode (normal sync) ──
|
||||
if not paginate_all:
|
||||
# Randomized pre-request delay to avoid machine-like timing
|
||||
_time.sleep(_random.uniform(1.0, 3.0))
|
||||
resp = session.get(
|
||||
f'https://www.instagram.com/api/v1/feed/user/{user_id}/',
|
||||
params={'count': 33},
|
||||
params={'count': 12},
|
||||
timeout=15
|
||||
)
|
||||
_update_claim_header(resp)
|
||||
_save_cookies()
|
||||
|
||||
if resp.status_code == 401:
|
||||
@@ -656,23 +687,30 @@ class InstagramAdapter(LoggingMixin):
|
||||
|
||||
while True:
|
||||
page += 1
|
||||
params = {'count': 33}
|
||||
params = {'count': 12}
|
||||
if max_id:
|
||||
params['max_id'] = max_id
|
||||
|
||||
# Human-like delay between pages (randomized)
|
||||
if page > 1:
|
||||
_time.sleep(_random.uniform(2.0, 5.0))
|
||||
else:
|
||||
_time.sleep(_random.uniform(1.0, 3.0))
|
||||
|
||||
try:
|
||||
resp = session.get(
|
||||
f'https://www.instagram.com/api/v1/feed/user/{user_id}/',
|
||||
params=params,
|
||||
timeout=15
|
||||
)
|
||||
_update_claim_header(resp)
|
||||
except Exception as e:
|
||||
self.log(f"Backfill page {page}: request error: {e}", 'warning')
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 3:
|
||||
self.log("Backfill: too many consecutive errors, stopping.", 'warning')
|
||||
break
|
||||
_time.sleep(5)
|
||||
_time.sleep(_random.uniform(5.0, 10.0))
|
||||
continue
|
||||
|
||||
if resp.status_code == 401:
|
||||
@@ -682,8 +720,8 @@ class InstagramAdapter(LoggingMixin):
|
||||
break
|
||||
|
||||
if resp.status_code == 429:
|
||||
self.log("Backfill: rate limited, waiting 60s...", 'warning')
|
||||
_time.sleep(60)
|
||||
self.log("Backfill: rate limited, waiting 60-120s...", 'warning')
|
||||
_time.sleep(_random.uniform(60, 120))
|
||||
continue
|
||||
|
||||
if resp.status_code != 200:
|
||||
@@ -692,7 +730,7 @@ class InstagramAdapter(LoggingMixin):
|
||||
if consecutive_errors >= 3:
|
||||
self.log("Backfill: too many consecutive errors, stopping.", 'warning')
|
||||
break
|
||||
_time.sleep(5)
|
||||
_time.sleep(_random.uniform(5.0, 10.0))
|
||||
continue
|
||||
|
||||
consecutive_errors = 0
|
||||
@@ -743,7 +781,6 @@ class InstagramAdapter(LoggingMixin):
|
||||
break
|
||||
|
||||
max_id = next_max_id
|
||||
_time.sleep(2)
|
||||
|
||||
_save_cookies()
|
||||
self.log(f"Backfill complete: {total_fetched} fetched, {total_new} new posts for @{username}", 'info')
|
||||
|
||||
@@ -2885,6 +2885,11 @@ class PaidContentScraper(LoggingMixin, DeferredDownloadsMixin):
|
||||
})
|
||||
return SyncResult(success=False, error=str(e))
|
||||
finally:
|
||||
# Inter-creator cooldown: wait 8-15s before releasing the lock so
|
||||
# the next Instagram creator doesn't fire immediately. This makes
|
||||
# the request pattern look like a human browsing between profiles.
|
||||
import random as _random
|
||||
await asyncio.sleep(_random.uniform(8.0, 15.0))
|
||||
ig_rate_limiter.operation_lock.release()
|
||||
|
||||
async def _fetch_stories_via_fastdl(self, username: str) -> Tuple[List, Dict]:
|
||||
@@ -8148,17 +8153,8 @@ class PaidContentScraper(LoggingMixin, DeferredDownloadsMixin):
|
||||
placeholders = ','.join(['?'] * len(filter_users))
|
||||
with self.db.unified_db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
# Count new posts that have at least one matching tagged user
|
||||
cursor.execute(f"""
|
||||
SELECT COUNT(DISTINCT p.id)
|
||||
FROM paid_content_posts p
|
||||
JOIN paid_content_post_tagged_users tu ON tu.post_id = p.id
|
||||
WHERE p.creator_id = ?
|
||||
AND tu.username IN ({placeholders})
|
||||
AND p.created_at >= datetime('now', '-1 hour')
|
||||
""", (creator['id'], *filter_users))
|
||||
filtered_new_posts = cursor.fetchone()[0]
|
||||
# Count downloaded attachments from matching posts
|
||||
# Count downloaded attachments from posts matching the tagged-user filter.
|
||||
# Use downloaded_at as the time anchor (added_at is often NULL).
|
||||
cursor.execute(f"""
|
||||
SELECT COUNT(DISTINCT a.id)
|
||||
FROM paid_content_attachments a
|
||||
@@ -8167,9 +8163,21 @@ class PaidContentScraper(LoggingMixin, DeferredDownloadsMixin):
|
||||
WHERE p.creator_id = ?
|
||||
AND tu.username IN ({placeholders})
|
||||
AND a.status = 'downloaded'
|
||||
AND a.updated_at >= datetime('now', '-1 hour')
|
||||
AND a.downloaded_at >= datetime('now', '-1 hour')
|
||||
""", (creator['id'], *filter_users))
|
||||
filtered_downloaded = cursor.fetchone()[0]
|
||||
# Count distinct posts that had matching downloads
|
||||
cursor.execute(f"""
|
||||
SELECT COUNT(DISTINCT p.id)
|
||||
FROM paid_content_posts p
|
||||
JOIN paid_content_post_tagged_users tu ON tu.post_id = p.id
|
||||
JOIN paid_content_attachments a ON a.post_id = p.id
|
||||
WHERE p.creator_id = ?
|
||||
AND tu.username IN ({placeholders})
|
||||
AND a.status = 'downloaded'
|
||||
AND a.downloaded_at >= datetime('now', '-1 hour')
|
||||
""", (creator['id'], *filter_users))
|
||||
filtered_new_posts = cursor.fetchone()[0]
|
||||
self.log(f"Notification filter: {new_posts} posts -> {filtered_new_posts}, "
|
||||
f"{downloaded} downloads -> {filtered_downloaded} (filter: {filter_users})", 'debug')
|
||||
new_posts = filtered_new_posts
|
||||
|
||||
Reference in New Issue
Block a user