637
docs/archive/WEB_GUI_LIVE_SCREENSHOTS.md
Normal file
637
docs/archive/WEB_GUI_LIVE_SCREENSHOTS.md
Normal file
@@ -0,0 +1,637 @@
|
||||
# Live Screenshot Streaming Feature
|
||||
|
||||
## Overview
|
||||
Stream live browser screenshots from Playwright scrapers to the web UI in real-time, providing visual insight into scraping progress.
|
||||
|
||||
---
|
||||
|
||||
## Technical Implementation
|
||||
|
||||
### 1. Backend - Screenshot Capture
|
||||
|
||||
**Modify Download Workers:**
|
||||
```python
|
||||
# backend/workers/download_worker.py
|
||||
from backend.core.websocket_manager import broadcast_screenshot
|
||||
import base64
|
||||
import asyncio
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def download_instagram_posts(self, queue_item_id: int, config: dict):
|
||||
"""Background task with live screenshot streaming"""
|
||||
|
||||
# Create screenshot callback
|
||||
async def screenshot_callback(page, action: str):
|
||||
"""Called periodically during scraping"""
|
||||
try:
|
||||
# Take screenshot
|
||||
screenshot_bytes = await page.screenshot(type='jpeg', quality=60)
|
||||
|
||||
# Encode to base64
|
||||
screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
|
||||
|
||||
# Broadcast via WebSocket
|
||||
await broadcast_screenshot({
|
||||
'type': 'scraper_screenshot',
|
||||
'queue_id': queue_item_id,
|
||||
'platform': 'instagram',
|
||||
'action': action,
|
||||
'screenshot': screenshot_b64,
|
||||
'timestamp': datetime.now().isoformat()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug(f"Screenshot capture error: {e}")
|
||||
|
||||
# Initialize downloader with screenshot callback
|
||||
downloader = FastDLDownloader(
|
||||
unified_db=get_unified_db(),
|
||||
log_callback=log_callback,
|
||||
screenshot_callback=screenshot_callback # New parameter
|
||||
)
|
||||
|
||||
# Rest of download logic...
|
||||
```
|
||||
|
||||
**Update Downloader Modules:**
|
||||
```python
|
||||
# modules/fastdl_module.py
|
||||
class FastDLDownloader:
|
||||
def __init__(self, ..., screenshot_callback=None):
|
||||
self.screenshot_callback = screenshot_callback
|
||||
|
||||
async def _run_download(self):
|
||||
"""Download with screenshot streaming"""
|
||||
with sync_playwright() as p:
|
||||
browser = p.firefox.launch(headless=self.headless)
|
||||
page = browser.new_page()
|
||||
|
||||
# Take screenshot at key points
|
||||
await self._capture_screenshot(page, "Navigating to Instagram")
|
||||
|
||||
page.goto("https://fastdl.app/en/instagram-download")
|
||||
|
||||
await self._capture_screenshot(page, "Filling username field")
|
||||
|
||||
input_box.fill(self.username)
|
||||
|
||||
await self._capture_screenshot(page, "Waiting for results")
|
||||
|
||||
# During scroll and download
|
||||
for i, card in enumerate(download_cards):
|
||||
if i % 3 == 0: # Screenshot every 3 items
|
||||
await self._capture_screenshot(
|
||||
page,
|
||||
f"Downloading item {i+1}/{len(download_cards)}"
|
||||
)
|
||||
|
||||
# Download logic...
|
||||
|
||||
async def _capture_screenshot(self, page, action: str):
|
||||
"""Capture and stream screenshot"""
|
||||
if self.screenshot_callback:
|
||||
try:
|
||||
await self.screenshot_callback(page, action)
|
||||
except Exception as e:
|
||||
logger.debug(f"Screenshot callback error: {e}")
|
||||
```
|
||||
|
||||
### 2. WebSocket Manager Enhancement
|
||||
|
||||
**Add Screenshot Broadcasting:**
|
||||
```python
|
||||
# backend/core/websocket_manager.py
|
||||
class ConnectionManager:
|
||||
def __init__(self):
|
||||
self.active_connections: List[WebSocket] = []
|
||||
self.screenshot_subscribers: Dict[int, List[WebSocket]] = {}
|
||||
|
||||
async def subscribe_screenshots(self, websocket: WebSocket, queue_id: int):
|
||||
"""Subscribe to screenshots for specific queue item"""
|
||||
if queue_id not in self.screenshot_subscribers:
|
||||
self.screenshot_subscribers[queue_id] = []
|
||||
self.screenshot_subscribers[queue_id].append(websocket)
|
||||
|
||||
async def unsubscribe_screenshots(self, websocket: WebSocket, queue_id: int):
|
||||
"""Unsubscribe from screenshots"""
|
||||
if queue_id in self.screenshot_subscribers:
|
||||
if websocket in self.screenshot_subscribers[queue_id]:
|
||||
self.screenshot_subscribers[queue_id].remove(websocket)
|
||||
|
||||
async def broadcast_screenshot(self, message: dict):
|
||||
"""Broadcast screenshot to subscribed clients only"""
|
||||
queue_id = message.get('queue_id')
|
||||
if queue_id and queue_id in self.screenshot_subscribers:
|
||||
disconnected = []
|
||||
for connection in self.screenshot_subscribers[queue_id]:
|
||||
try:
|
||||
await connection.send_json(message)
|
||||
except:
|
||||
disconnected.append(connection)
|
||||
|
||||
# Clean up disconnected
|
||||
for conn in disconnected:
|
||||
self.screenshot_subscribers[queue_id].remove(conn)
|
||||
|
||||
# Global function
|
||||
async def broadcast_screenshot(message: dict):
|
||||
await manager.broadcast_screenshot(message)
|
||||
```
|
||||
|
||||
### 3. API Endpoint for Screenshot Control
|
||||
|
||||
**Add Screenshot Subscription:**
|
||||
```python
|
||||
# backend/api/routes/websocket.py
|
||||
@router.websocket("/ws/screenshots/{queue_id}")
|
||||
async def websocket_screenshots(
|
||||
websocket: WebSocket,
|
||||
queue_id: int,
|
||||
user_id: int = Depends(get_current_user_ws)
|
||||
):
|
||||
"""WebSocket endpoint for live screenshot streaming"""
|
||||
await manager.connect(websocket, user_id)
|
||||
await manager.subscribe_screenshots(websocket, queue_id)
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Keep connection alive
|
||||
data = await websocket.receive_text()
|
||||
|
||||
if data == "ping":
|
||||
await websocket.send_text("pong")
|
||||
elif data == "stop":
|
||||
# Client wants to stop receiving screenshots
|
||||
await manager.unsubscribe_screenshots(websocket, queue_id)
|
||||
break
|
||||
|
||||
except Exception:
|
||||
manager.disconnect(websocket, user_id)
|
||||
await manager.unsubscribe_screenshots(websocket, queue_id)
|
||||
```
|
||||
|
||||
### 4. Frontend Implementation
|
||||
|
||||
**Screenshot Viewer Component:**
|
||||
```vue
|
||||
<!-- frontend/src/components/LiveScreenshotViewer.vue -->
|
||||
<template>
|
||||
<div class="screenshot-viewer">
|
||||
<v-card>
|
||||
<v-card-title>
|
||||
Live Scraper View - {{ platform }}
|
||||
<v-spacer></v-spacer>
|
||||
<v-chip :color="isLive ? 'success' : 'grey'" small>
|
||||
<v-icon small left>{{ isLive ? 'mdi-circle' : 'mdi-circle-outline' }}</v-icon>
|
||||
{{ isLive ? 'LIVE' : 'Offline' }}
|
||||
</v-chip>
|
||||
</v-card-title>
|
||||
|
||||
<v-card-text>
|
||||
<!-- Screenshot Display -->
|
||||
<div class="screenshot-container" v-if="screenshot">
|
||||
<img
|
||||
:src="`data:image/jpeg;base64,${screenshot}`"
|
||||
alt="Live scraper screenshot"
|
||||
class="screenshot-image"
|
||||
/>
|
||||
|
||||
<!-- Action Overlay -->
|
||||
<div class="action-overlay">
|
||||
<v-chip color="primary" dark>
|
||||
{{ currentAction }}
|
||||
</v-chip>
|
||||
</div>
|
||||
|
||||
<!-- Timestamp -->
|
||||
<div class="timestamp-overlay">
|
||||
Updated {{ timeSince }} ago
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Placeholder when no screenshot -->
|
||||
<div v-else class="screenshot-placeholder">
|
||||
<v-icon size="64" color="grey lighten-2">mdi-camera-off</v-icon>
|
||||
<div class="mt-4">Waiting for scraper to start...</div>
|
||||
</div>
|
||||
</v-card-text>
|
||||
|
||||
<v-card-actions>
|
||||
<v-btn
|
||||
:color="enabled ? 'error' : 'success'"
|
||||
@click="toggleScreenshots"
|
||||
outlined
|
||||
small
|
||||
>
|
||||
<v-icon left small>
|
||||
{{ enabled ? 'mdi-pause' : 'mdi-play' }}
|
||||
</v-icon>
|
||||
{{ enabled ? 'Pause Screenshots' : 'Resume Screenshots' }}
|
||||
</v-btn>
|
||||
|
||||
<v-btn
|
||||
color="primary"
|
||||
@click="downloadScreenshot"
|
||||
:disabled="!screenshot"
|
||||
outlined
|
||||
small
|
||||
>
|
||||
<v-icon left small>mdi-download</v-icon>
|
||||
Save Screenshot
|
||||
</v-btn>
|
||||
|
||||
<v-spacer></v-spacer>
|
||||
|
||||
<v-chip small outlined>
|
||||
FPS: {{ fps }}
|
||||
</v-chip>
|
||||
</v-card-actions>
|
||||
</v-card>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import { ref, computed, onMounted, onUnmounted } from 'vue';
|
||||
import websocketService from '@/services/websocket';
|
||||
|
||||
export default {
|
||||
name: 'LiveScreenshotViewer',
|
||||
props: {
|
||||
queueId: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
platform: {
|
||||
type: String,
|
||||
required: true
|
||||
}
|
||||
},
|
||||
setup(props) {
|
||||
const screenshot = ref(null);
|
||||
const currentAction = ref('Initializing...');
|
||||
const lastUpdate = ref(null);
|
||||
const enabled = ref(true);
|
||||
const isLive = ref(false);
|
||||
const fps = ref(0);
|
||||
|
||||
let wsConnection = null;
|
||||
let frameCount = 0;
|
||||
let fpsInterval = null;
|
||||
|
||||
const timeSince = computed(() => {
|
||||
if (!lastUpdate.value) return 'never';
|
||||
const seconds = Math.floor((Date.now() - lastUpdate.value) / 1000);
|
||||
if (seconds < 60) return `${seconds}s`;
|
||||
return `${Math.floor(seconds / 60)}m`;
|
||||
});
|
||||
|
||||
const connectWebSocket = () => {
|
||||
wsConnection = websocketService.connectScreenshots(props.queueId);
|
||||
|
||||
wsConnection.on('scraper_screenshot', (data) => {
|
||||
if (enabled.value) {
|
||||
screenshot.value = data.screenshot;
|
||||
currentAction.value = data.action;
|
||||
lastUpdate.value = Date.now();
|
||||
isLive.value = true;
|
||||
frameCount++;
|
||||
}
|
||||
});
|
||||
|
||||
wsConnection.on('download_completed', () => {
|
||||
isLive.value = false;
|
||||
currentAction.value = 'Download completed';
|
||||
});
|
||||
|
||||
wsConnection.on('download_failed', () => {
|
||||
isLive.value = false;
|
||||
currentAction.value = 'Download failed';
|
||||
});
|
||||
};
|
||||
|
||||
const toggleScreenshots = () => {
|
||||
enabled.value = !enabled.value;
|
||||
if (!enabled.value) {
|
||||
isLive.value = false;
|
||||
}
|
||||
};
|
||||
|
||||
const downloadScreenshot = () => {
|
||||
if (!screenshot.value) return;
|
||||
|
||||
const link = document.createElement('a');
|
||||
link.href = `data:image/jpeg;base64,${screenshot.value}`;
|
||||
link.download = `screenshot_${props.queueId}_${Date.now()}.jpg`;
|
||||
link.click();
|
||||
};
|
||||
|
||||
onMounted(() => {
|
||||
connectWebSocket();
|
||||
|
||||
// Calculate FPS
|
||||
fpsInterval = setInterval(() => {
|
||||
fps.value = frameCount;
|
||||
frameCount = 0;
|
||||
}, 1000);
|
||||
});
|
||||
|
||||
onUnmounted(() => {
|
||||
if (wsConnection) {
|
||||
wsConnection.send('stop');
|
||||
wsConnection.disconnect();
|
||||
}
|
||||
clearInterval(fpsInterval);
|
||||
});
|
||||
|
||||
return {
|
||||
screenshot,
|
||||
currentAction,
|
||||
timeSince,
|
||||
enabled,
|
||||
isLive,
|
||||
fps,
|
||||
toggleScreenshots,
|
||||
downloadScreenshot
|
||||
};
|
||||
}
|
||||
};
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.screenshot-viewer {
|
||||
margin: 16px 0;
|
||||
}
|
||||
|
||||
.screenshot-container {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
background: #000;
|
||||
border-radius: 4px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.screenshot-image {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.action-overlay {
|
||||
position: absolute;
|
||||
top: 16px;
|
||||
left: 16px;
|
||||
z-index: 10;
|
||||
}
|
||||
|
||||
.timestamp-overlay {
|
||||
position: absolute;
|
||||
bottom: 16px;
|
||||
right: 16px;
|
||||
background: rgba(0, 0, 0, 0.7);
|
||||
color: white;
|
||||
padding: 4px 8px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
z-index: 10;
|
||||
}
|
||||
|
||||
.screenshot-placeholder {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-height: 400px;
|
||||
background: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
color: #999;
|
||||
}
|
||||
</style>
|
||||
```
|
||||
|
||||
**WebSocket Service Enhancement:**
|
||||
```javascript
|
||||
// frontend/src/services/websocket.js
|
||||
class WebSocketClient {
|
||||
// ... existing code ...
|
||||
|
||||
connectScreenshots(queueId) {
|
||||
const token = localStorage.getItem('access_token');
|
||||
const ws = new WebSocket(
|
||||
`ws://localhost:8000/ws/screenshots/${queueId}?token=${token}`
|
||||
);
|
||||
|
||||
const listeners = new Map();
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
const message = JSON.parse(event.data);
|
||||
this.notifyListeners(listeners, message);
|
||||
};
|
||||
|
||||
return {
|
||||
on: (type, callback) => {
|
||||
if (!listeners.has(type)) {
|
||||
listeners.set(type, []);
|
||||
}
|
||||
listeners.get(type).push(callback);
|
||||
},
|
||||
send: (message) => {
|
||||
if (ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(message);
|
||||
}
|
||||
},
|
||||
disconnect: () => {
|
||||
ws.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
notifyListeners(listeners, message) {
|
||||
const { type, data } = message;
|
||||
if (listeners.has(type)) {
|
||||
listeners.get(type).forEach(callback => callback(data));
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Usage in Queue Manager:**
|
||||
```vue
|
||||
<!-- frontend/src/views/QueueManager.vue -->
|
||||
<template>
|
||||
<v-container>
|
||||
<v-row>
|
||||
<!-- Queue List -->
|
||||
<v-col cols="12" md="6">
|
||||
<v-card>
|
||||
<v-card-title>Download Queue</v-card-title>
|
||||
<v-list>
|
||||
<v-list-item
|
||||
v-for="item in queueItems"
|
||||
:key="item.id"
|
||||
@click="selectedQueueId = item.id"
|
||||
:class="{ 'selected': selectedQueueId === item.id }"
|
||||
>
|
||||
<!-- Queue item details -->
|
||||
</v-list-item>
|
||||
</v-list>
|
||||
</v-card>
|
||||
</v-col>
|
||||
|
||||
<!-- Live Screenshot Viewer -->
|
||||
<v-col cols="12" md="6">
|
||||
<LiveScreenshotViewer
|
||||
v-if="selectedQueueId"
|
||||
:queue-id="selectedQueueId"
|
||||
:platform="selectedItem.platform"
|
||||
/>
|
||||
</v-col>
|
||||
</v-row>
|
||||
</v-container>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import LiveScreenshotViewer from '@/components/LiveScreenshotViewer.vue';
|
||||
|
||||
export default {
|
||||
components: {
|
||||
LiveScreenshotViewer
|
||||
},
|
||||
// ... rest of component
|
||||
};
|
||||
</script>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Optimizations
|
||||
|
||||
### 1. Screenshot Quality & Size Control
|
||||
|
||||
```python
|
||||
# Adjustable quality based on bandwidth
|
||||
screenshot_bytes = page.screenshot(
|
||||
type='jpeg',
|
||||
quality=60, # 60% quality = smaller size
|
||||
full_page=False # Only visible area
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Frame Rate Limiting
|
||||
|
||||
```python
|
||||
# Only send screenshot every 2-3 seconds, not every action
|
||||
last_screenshot_time = 0
|
||||
screenshot_interval = 2.0 # seconds
|
||||
|
||||
async def _capture_screenshot_throttled(self, page, action: str):
|
||||
current_time = time.time()
|
||||
if current_time - self.last_screenshot_time >= self.screenshot_interval:
|
||||
await self._capture_screenshot(page, action)
|
||||
self.last_screenshot_time = current_time
|
||||
```
|
||||
|
||||
### 3. Client-Side Caching
|
||||
|
||||
```javascript
|
||||
// Only update DOM if screenshot actually changed
|
||||
const screenshotHash = simpleHash(data.screenshot);
|
||||
if (screenshotHash !== lastScreenshotHash.value) {
|
||||
screenshot.value = data.screenshot;
|
||||
lastScreenshotHash.value = screenshotHash;
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Opt-in Feature
|
||||
|
||||
```python
|
||||
# Only capture screenshots if client is subscribed
|
||||
if len(self.screenshot_subscribers.get(queue_id, [])) > 0:
|
||||
await self._capture_screenshot(page, action)
|
||||
# Otherwise skip to save resources
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## User Settings
|
||||
|
||||
**Add to Settings Page:**
|
||||
```json
|
||||
{
|
||||
"live_screenshots": {
|
||||
"enabled": true,
|
||||
"quality": 60,
|
||||
"frame_rate": 0.5, // screenshots per second
|
||||
"auto_enable": false // enable by default for new downloads
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Benefits
|
||||
|
||||
1. **Visual Debugging** - See exactly what's happening during scraping
|
||||
2. **Confidence** - Know the scraper is working correctly
|
||||
3. **Entertainment** - Watch downloads happen in real-time
|
||||
4. **Troubleshooting** - Immediately spot issues (CAPTCHA, layout changes)
|
||||
5. **Learning** - Understand how scrapers navigate sites
|
||||
|
||||
---
|
||||
|
||||
## Bandwidth Considerations
|
||||
|
||||
**Typical Screenshot:**
|
||||
- Size: 50-150 KB (JPEG 60% quality)
|
||||
- Frequency: 0.5 FPS (1 screenshot every 2 seconds)
|
||||
- Bandwidth: ~25-75 KB/s per active download
|
||||
|
||||
**With 4 concurrent downloads:**
|
||||
- Total: ~100-300 KB/s = 0.8-2.4 Mbps
|
||||
|
||||
This is very reasonable for modern internet connections.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Features (Future)
|
||||
|
||||
### 1. Element Highlighting
|
||||
```python
|
||||
# Highlight the element being scraped
|
||||
await page.evaluate("""
|
||||
(selector) => {
|
||||
const element = document.querySelector(selector);
|
||||
if (element) {
|
||||
element.style.outline = '3px solid red';
|
||||
}
|
||||
}
|
||||
""", current_selector)
|
||||
|
||||
# Then take screenshot
|
||||
screenshot = await page.screenshot()
|
||||
```
|
||||
|
||||
### 2. Recording Mode
|
||||
```python
|
||||
# Option to save all screenshots as video
|
||||
ffmpeg -framerate 0.5 -i screenshot_%04d.jpg -c:v libx264 scraping_video.mp4
|
||||
```
|
||||
|
||||
### 3. Comparison Mode
|
||||
```javascript
|
||||
// Show before/after for quality upgrade
|
||||
<div class="comparison">
|
||||
<img src="fastdl_screenshot" label="FastDL (640x640)" />
|
||||
<img src="toolzu_screenshot" label="Toolzu (1920x1440)" />
|
||||
</div>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
This feature should be added in **Phase 4 (Advanced Features)** since it's not critical for core functionality but provides excellent user experience.
|
||||
|
||||
**Estimated Development Time:** 3-4 days
|
||||
- Backend: 1 day
|
||||
- Frontend component: 1 day
|
||||
- WebSocket integration: 1 day
|
||||
- Testing & optimization: 1 day
|
||||
Reference in New Issue
Block a user