diff --git a/apps/chat/src/index.ts b/apps/chat/src/index.ts index bee4c06..feaed8a 100644 --- a/apps/chat/src/index.ts +++ b/apps/chat/src/index.ts @@ -12,7 +12,12 @@ import { recordChatError, recordChatModerationBlock, recordDeliveredChatMessage, + recordDeliveredChatMessageBytes, + recordHistoryMessagesLoaded, recordIncomingChatMessage, + recordUniqueChatter, + setChannelHistorySize, + setChatModerationState, startChatMessageTimer, } from './metrics.js'; import { getPersonalChannel } from './utils/personalChannel.js'; @@ -476,8 +481,17 @@ app.get( socketState.isModerator = isModerator; socket.metricsTracked = true; socketState.metricsTracked = true; + socket.metricsAuthMethod = authMethod; + socketState.metricsAuthMethod = authMethod; - recordChatConnectionAccepted(authMethod); + recordChatConnectionAccepted(username, authMethod); + setChatModerationState(username, { + blockedTerms: moderationSettings.blockedTerms.length, + maxMessageLength: moderationSettings.maxMessageLength, + rateLimitCount: moderationSettings.rateLimitCount, + rateLimitWindowSeconds: moderationSettings.rateLimitWindowSeconds, + slowModeSeconds: moderationSettings.slowModeSeconds, + }); socket.send( JSON.stringify({ @@ -507,6 +521,7 @@ app.get( const messages = await redis.zrange(channelKey, 0, MESSAGE_HISTORY_SIZE - 1); if (messages.length > 0) { + recordHistoryMessagesLoaded(username, messages.length); socket.send( JSON.stringify({ type: 'history', @@ -514,6 +529,7 @@ app.get( }) ); } + setChannelHistorySize(username, messages.length); }, async onClose(evt, ws) { const socket = ws as unknown as ChatSocket; @@ -522,7 +538,10 @@ app.get( if (!socketState.targetUsername) return; if (socketState.metricsTracked) { - recordChatDisconnect(); + recordChatDisconnect( + socketState.targetUsername, + socketState.metricsAuthMethod ?? 'unknown' + ); socketState.metricsTracked = false; } @@ -547,9 +566,10 @@ app.get( try { const socket = ws as unknown as ChatSocket; const socketState = resolveSocketState(socket); - const msg = JSON.parse(evt.data.toString()) as IncomingMessage; + const rawPayload = evt.data.toString(); + const msg = JSON.parse(rawPayload) as IncomingMessage; messageType = typeof msg.type === 'string' ? msg.type : 'unknown'; - recordIncomingChatMessage(messageType); + recordIncomingChatMessage(messageType, Buffer.byteLength(rawPayload)); stopTimer = startChatMessageTimer(messageType); if (msg.type === 'ping') { @@ -707,9 +727,16 @@ app.get( await redis.zadd(channelKey, Date.now(), redisStr); await redis.zremrangebyrank(channelKey, 0, -MESSAGE_HISTORY_SIZE - 1); await redis.expire(channelKey, MESSAGE_TTL); + const historySize = await redis.zcard(channelKey); + setChannelHistorySize(targetUsername, historySize); broadcastToChannel(targetUsername, socket, msgObj as unknown as Record); recordDeliveredChatMessage(chatUser.isBot ? 'bot' : 'user'); + recordDeliveredChatMessageBytes( + chatUser.isBot ? 'bot' : 'user', + Buffer.byteLength(message) + ); + recordUniqueChatter(chatUser.isBot ? 'bot' : 'user'); outcome = 'broadcast'; } if (msg.type === 'emojiMsg') { diff --git a/apps/chat/src/metrics.ts b/apps/chat/src/metrics.ts index c21f277..804498c 100644 --- a/apps/chat/src/metrics.ts +++ b/apps/chat/src/metrics.ts @@ -15,6 +15,20 @@ function createMetricsStore() { registers: [register], }); + const websocketConnectionsByChannel = new Gauge({ + name: 'hctv_chat_websocket_connections_by_channel', + help: 'Current number of active chat websocket connections by target channel.', + labelNames: ['channel'], + registers: [register], + }); + + const websocketConnectionsByAuthMethod = new Gauge({ + name: 'hctv_chat_websocket_connections_by_auth_method', + help: 'Current number of active chat websocket connections by auth method.', + labelNames: ['auth_method'], + registers: [register], + }); + const websocketConnectionAttempts = new Counter({ name: 'hctv_chat_websocket_connection_attempts_total', help: 'Total websocket connection attempts grouped by outcome and auth method.', @@ -29,6 +43,13 @@ function createMetricsStore() { registers: [register], }); + const inboundPayloadBytes = new Counter({ + name: 'hctv_chat_inbound_payload_bytes_total', + help: 'Total inbound websocket payload bytes grouped by message type.', + labelNames: ['type'], + registers: [register], + }); + const messageDuration = new Histogram({ name: 'hctv_chat_message_duration_seconds', help: 'Chat websocket message processing time in seconds.', @@ -44,6 +65,41 @@ function createMetricsStore() { registers: [register], }); + const deliveredMessageBytes = new Counter({ + name: 'hctv_chat_message_bytes_delivered_total', + help: 'Total message body bytes successfully broadcast, grouped by sender type.', + labelNames: ['sender_type'], + registers: [register], + }); + + const channelHistorySize = new Gauge({ + name: 'hctv_chat_channel_history_size', + help: 'Current number of messages retained in Redis history for a channel.', + labelNames: ['channel'], + registers: [register], + }); + + const channelHistoryLoadedMessages = new Counter({ + name: 'hctv_chat_history_messages_loaded_total', + help: 'Total history messages loaded from Redis during websocket joins.', + labelNames: ['channel'], + registers: [register], + }); + + const moderationState = new Gauge({ + name: 'hctv_chat_moderation_state', + help: 'Current moderation settings by channel.', + labelNames: ['channel', 'setting'], + registers: [register], + }); + + const channelUniqueChatters = new Counter({ + name: 'hctv_chat_unique_chatters_total', + help: 'Users who successfully sent at least one chat message, grouped by sender type.', + labelNames: ['sender_type'], + registers: [register], + }); + const moderationActions = new Counter({ name: 'hctv_chat_moderation_actions_total', help: 'Successful moderation actions performed in chat.', @@ -67,14 +123,22 @@ function createMetricsStore() { return { deliveredMessages, + deliveredMessageBytes, + channelHistoryLoadedMessages, + channelHistorySize, errors, + inboundPayloadBytes, incomingMessages, messageDuration, moderationActions, moderationBlocks, + moderationState, register, + channelUniqueChatters, websocketConnectionAttempts, websocketConnections, + websocketConnectionsByAuthMethod, + websocketConnectionsByChannel, }; } @@ -86,21 +150,26 @@ const metrics = (globalForMetrics.__hctvChatMetrics ??= createMetricsStore()); export const chatMetricsRegistry = metrics.register; -export function recordChatConnectionAccepted(authMethod: string): void { +export function recordChatConnectionAccepted(channel: string, authMethod: string): void { metrics.websocketConnectionAttempts.inc({ auth_method: authMethod, outcome: 'accepted' }); metrics.websocketConnections.inc(); + metrics.websocketConnectionsByChannel.inc({ channel }); + metrics.websocketConnectionsByAuthMethod.inc({ auth_method: authMethod }); } export function recordChatConnectionRejected(authMethod: string): void { metrics.websocketConnectionAttempts.inc({ auth_method: authMethod, outcome: 'rejected' }); } -export function recordChatDisconnect(): void { +export function recordChatDisconnect(channel: string, authMethod: string): void { metrics.websocketConnections.dec(); + metrics.websocketConnectionsByChannel.dec({ channel }); + metrics.websocketConnectionsByAuthMethod.dec({ auth_method: authMethod }); } -export function recordIncomingChatMessage(type: string): void { +export function recordIncomingChatMessage(type: string, payloadBytes: number): void { metrics.incomingMessages.inc({ type }); + metrics.inboundPayloadBytes.inc({ type }, payloadBytes); } export function startChatMessageTimer(type: string) { @@ -111,6 +180,47 @@ export function recordDeliveredChatMessage(senderType: string): void { metrics.deliveredMessages.inc({ sender_type: senderType }); } +export function recordDeliveredChatMessageBytes(senderType: string, bytes: number): void { + metrics.deliveredMessageBytes.inc({ sender_type: senderType }, bytes); +} + +export function setChannelHistorySize(channel: string, size: number): void { + metrics.channelHistorySize.set({ channel }, size); +} + +export function recordHistoryMessagesLoaded(channel: string, count: number): void { + if (count > 0) { + metrics.channelHistoryLoadedMessages.inc({ channel }, count); + } +} + +export function setChatModerationState( + channel: string, + settings: { + blockedTerms: number; + maxMessageLength: number; + rateLimitCount: number; + rateLimitWindowSeconds: number; + slowModeSeconds: number; + } +): void { + metrics.moderationState.set({ channel, setting: 'blocked_terms' }, settings.blockedTerms); + metrics.moderationState.set({ channel, setting: 'slow_mode_seconds' }, settings.slowModeSeconds); + metrics.moderationState.set( + { channel, setting: 'max_message_length' }, + settings.maxMessageLength + ); + metrics.moderationState.set({ channel, setting: 'rate_limit_count' }, settings.rateLimitCount); + metrics.moderationState.set( + { channel, setting: 'rate_limit_window_seconds' }, + settings.rateLimitWindowSeconds + ); +} + +export function recordUniqueChatter(senderType: string): void { + metrics.channelUniqueChatters.inc({ sender_type: senderType }); +} + export function recordChatModerationAction(action: string): void { metrics.moderationActions.inc({ action }); } diff --git a/apps/chat/src/types/chat.ts b/apps/chat/src/types/chat.ts index bb9714d..6e7694c 100644 --- a/apps/chat/src/types/chat.ts +++ b/apps/chat/src/types/chat.ts @@ -40,6 +40,7 @@ export interface ChatSocket { viewerId?: string; isModerator?: boolean; metricsTracked?: boolean; + metricsAuthMethod?: string; raw?: | (ModifiedWebSocket & { targetUsername?: string; @@ -48,6 +49,7 @@ export interface ChatSocket { personalChannel?: any; isModerator?: boolean; metricsTracked?: boolean; + metricsAuthMethod?: string; }) | null; } diff --git a/apps/web/src/lib/instrumentation/getLiveThumb.ts b/apps/web/src/lib/instrumentation/getLiveThumb.ts index a354b7f..8e18252 100644 --- a/apps/web/src/lib/instrumentation/getLiveThumb.ts +++ b/apps/web/src/lib/instrumentation/getLiveThumb.ts @@ -1,5 +1,5 @@ import { prisma } from '@hctv/db'; -import { recordThumbnailJobsEnqueued, trackWebJob } from '../metrics'; +import { recordThumbnailJobsEnqueued, setThumbnailRefreshTargets, trackWebJob } from '../metrics'; import { getThumbnailQueue } from '../workers'; export default async function getLiveThumb() { @@ -15,6 +15,8 @@ export default async function getLiveThumb() { const thumbQueue = getThumbnailQueue(); const jobsByRegion: Record = {}; + setThumbnailRefreshTargets(liveChannels.length); + for (const liveChannel of liveChannels) { await thumbQueue.add('getLiveThumb', { name: liveChannel.channel.name, diff --git a/apps/web/src/lib/instrumentation/streamInfo.ts b/apps/web/src/lib/instrumentation/streamInfo.ts index c3aa292..cdfb8c2 100644 --- a/apps/web/src/lib/instrumentation/streamInfo.ts +++ b/apps/web/src/lib/instrumentation/streamInfo.ts @@ -1,5 +1,13 @@ import { prisma } from '@hctv/db'; -import { setLiveStreamsByRegion, trackWebJob } from '../metrics'; +import { + recordLiveStreamTransition, + recordNotificationsEnqueued, + recordStreamSyncScrape, + setLiveStreamsByRegion, + setPlatformInventory, + setStreamPathsByRegion, + trackWebJob, +} from '../metrics'; import { HttpFlv } from '../types/liveBackendJson'; import { getNotificationQueue } from '../workers'; import client from '../services/slackNotifier'; @@ -11,11 +19,30 @@ export default async function runner() { if ((await prisma.user.count()) === 0) { return; } + await refreshPlatformInventory(); await initializeStreamInfo(); await syncStream(); setInterval(syncStream, 5000); } +async function refreshPlatformInventory() { + const [channels, liveStreams, follows, botAccounts, users] = await Promise.all([ + prisma.channel.count(), + prisma.streamInfo.count({ where: { isLive: true } }), + prisma.follow.count(), + prisma.botAccount.count(), + prisma.user.count(), + ]); + + setPlatformInventory({ + bot_accounts: botAccounts, + channels, + follows, + live_stream_rows: liveStreams, + users, + }); +} + export async function initializeStreamInfo(channelId?: string) { const channels = await prisma.channel.findMany({ where: { @@ -58,18 +85,22 @@ export async function syncStream() { const allActiveStreams = new Map(); const liveStreamsByRegion = Object.fromEntries(regions.map((region) => [region, 0])); + const pathsSeenByRegion = Object.fromEntries(regions.map((region) => [region, 0])); for (const r of regions) { const region = MEDIAMTX_SERVER_REGIONS[r]; const response = await fetch(`${region.apiUrl}/v3/paths/list?itemsPerPage=1000`); if (!response.ok) { + recordStreamSyncScrape(r, 'error'); console.error( `Failed to fetch ${r} stream stats: ${response.status} ${response.statusText}` ); continue; } + recordStreamSyncScrape(r, 'success'); + type ResponseType = paths['/v3/paths/list']['get']['responses']['200']['content']['application/json']; const data = (await response.json()) as ResponseType; @@ -79,12 +110,14 @@ export async function syncStream() { if (stream.ready && stream.name) { allActiveStreams.set(stream.name, r); liveStreamsByRegion[r] += 1; + pathsSeenByRegion[r] += 1; } } } } setLiveStreamsByRegion(liveStreamsByRegion); + setStreamPathsByRegion(pathsSeenByRegion); const currentLiveStreams = await prisma.streamInfo.findMany({ where: { isLive: true }, @@ -92,6 +125,7 @@ export async function syncStream() { for (const dbStream of currentLiveStreams) { if (!allActiveStreams.has(dbStream.username)) { + recordLiveStreamTransition('offline', dbStream.streamRegion); await prisma.streamInfo.update({ where: { username: dbStream.username }, data: { @@ -111,6 +145,7 @@ export async function syncStream() { if (existingStream && !existingStream.isLive) { console.log(`Stream ${username} is now live in region ${regionKey}`); + recordLiveStreamTransition('online', regionKey); await prisma.streamInfo.update({ where: { username }, data: { @@ -131,7 +166,6 @@ export async function syncStream() { }); const queue = getNotificationQueue(); - if (!existingStream.channel.is247) { queue.add(`streamStartChannel:${existingStream.username}`, { text: `${existingStream.username} is now *live*, streaming *${existingStream.title}* (${existingStream.category})!\n`, @@ -149,8 +183,18 @@ export async function syncStream() { }); } } + + recordNotificationsEnqueued('channel', existingStream.channel.is247 ? 0 : 1); + recordNotificationsEnqueued( + 'dm', + existingStream.enableNotifications && !existingStream.channel.is247 + ? subscribedFollowers.length + : 0 + ); } } + + await refreshPlatformInventory(); }); } catch (error) { console.error('Error syncing stream status:', error); diff --git a/apps/web/src/lib/instrumentation/syncStreamKeys.ts b/apps/web/src/lib/instrumentation/syncStreamKeys.ts index 45b6e17..b240ebd 100644 --- a/apps/web/src/lib/instrumentation/syncStreamKeys.ts +++ b/apps/web/src/lib/instrumentation/syncStreamKeys.ts @@ -1,5 +1,5 @@ import { prisma, getRedisConnection } from '@hctv/db'; -import { trackWebJob } from '../metrics'; +import { setCacheEntryCount, trackWebJob } from '../metrics'; export default async function syncStreamKeys() { console.log('Syncing stream keys to Redis...'); @@ -26,6 +26,7 @@ export default async function syncStreamKeys() { } await pipeline.exec(); + setCacheEntryCount('stream_keys', keys.length); console.log(`Synced ${keys.length} stream keys to Redis`); }); } catch (error) { diff --git a/apps/web/src/lib/instrumentation/viewerCountSync.ts b/apps/web/src/lib/instrumentation/viewerCountSync.ts index 28f0939..5db888a 100644 --- a/apps/web/src/lib/instrumentation/viewerCountSync.ts +++ b/apps/web/src/lib/instrumentation/viewerCountSync.ts @@ -14,7 +14,13 @@ export async function viewerCountSync() { }); if (streams.length === 0) { - setViewerSnapshot(0, 0); + setViewerSnapshot({ + hottestStreamViewers: 0, + streamsWithViewers: 0, + totalViewers: 0, + trackedStreams: 0, + viewersByRegion: {}, + }); return; } @@ -25,12 +31,23 @@ export async function viewerCountSync() { } const results = await multi.exec(); let totalViewers = 0; + let streamsWithViewers = 0; + let hottestStreamViewers = 0; + const viewersByRegion: Record = {}; await prisma.$transaction(async (tx) => { const updates = results?.map((res, index) => { const count = Array.isArray(res[1]) ? res[1].length : 0; totalViewers += count; + if (count > 0) { + streamsWithViewers += 1; + } + if (count > hottestStreamViewers) { + hottestStreamViewers = count; + } const stream = streams[index]; + viewersByRegion[stream.streamRegion] = + (viewersByRegion[stream.streamRegion] ?? 0) + count; return tx.streamInfo.update({ where: { username: stream.username, @@ -43,7 +60,13 @@ export async function viewerCountSync() { await Promise.all(updates || []); }); - setViewerSnapshot(totalViewers, streams.length); + setViewerSnapshot({ + hottestStreamViewers, + streamsWithViewers, + totalViewers, + trackedStreams: streams.length, + viewersByRegion, + }); }); } catch (error) { console.error('Error syncing viewer counts:', error); diff --git a/apps/web/src/lib/instrumentation/writeSessions.ts b/apps/web/src/lib/instrumentation/writeSessions.ts index f31bfb6..585a05e 100644 --- a/apps/web/src/lib/instrumentation/writeSessions.ts +++ b/apps/web/src/lib/instrumentation/writeSessions.ts @@ -1,5 +1,5 @@ import { getRedisConnection, prisma } from '@hctv/db'; -import { trackWebJob } from '../metrics'; +import { setCacheEntryCount, trackWebJob } from '../metrics'; export default async function writeSessions() { return trackWebJob('write_sessions', async () => { @@ -13,6 +13,7 @@ export default async function writeSessions() { multi.set(`sessions:${sessionId}`, ''); } await multi.exec(); + setCacheEntryCount('sessions', sessionIds.length); console.log('Sessions written to Redis'); }); diff --git a/apps/web/src/lib/metrics.ts b/apps/web/src/lib/metrics.ts index 797e69d..06c9748 100644 --- a/apps/web/src/lib/metrics.ts +++ b/apps/web/src/lib/metrics.ts @@ -31,18 +31,58 @@ function createMetricsStore() { registers: [register], }); + const streamPathsSeen = new Gauge({ + name: 'hctv_web_stream_paths_seen', + help: 'Current number of ready MediaMTX paths seen during the latest sync.', + labelNames: ['region'], + registers: [register], + }); + + const liveStreamTransitions = new Counter({ + name: 'hctv_web_live_stream_transitions_total', + help: 'Live stream state transitions observed by the web app.', + labelNames: ['transition', 'region'], + registers: [register], + }); + + const streamSyncScrapes = new Counter({ + name: 'hctv_web_stream_sync_scrapes_total', + help: 'MediaMTX region scrapes attempted by stream sync.', + labelNames: ['region', 'status'], + registers: [register], + }); + const activeViewers = new Gauge({ name: 'hctv_web_active_viewers', help: 'Current number of active viewers across all live streams.', registers: [register], }); + const activeViewersByRegion = new Gauge({ + name: 'hctv_web_active_viewers_by_region', + help: 'Current number of active viewers grouped by stream region.', + labelNames: ['region'], + registers: [register], + }); + const viewerCountTrackedStreams = new Gauge({ name: 'hctv_web_viewer_count_tracked_streams', help: 'Number of live streams included in the latest viewer sync.', registers: [register], }); + const streamsWithViewers = new Gauge({ + name: 'hctv_web_streams_with_viewers', + help: 'Current number of live streams with at least one viewer.', + registers: [register], + }); + + const hottestStreamViewers = new Gauge({ + name: 'hctv_web_hottest_stream_viewers', + help: 'Current viewer count of the most watched live stream.', + registers: [register], + }); + const thumbnailJobsEnqueued = new Counter({ name: 'hctv_web_thumbnail_jobs_enqueued_total', help: 'Total thumbnail refresh jobs enqueued by region.', @@ -50,6 +90,33 @@ function createMetricsStore() { registers: [register], }); + const thumbnailRefreshTargets = new Gauge({ + name: 'hctv_web_thumbnail_refresh_targets', + help: 'Number of live streams targeted in the latest thumbnail refresh run.', + registers: [register], + }); + + const notificationsEnqueued = new Counter({ + name: 'hctv_web_notifications_enqueued_total', + help: 'Notification jobs enqueued when streams go live.', + labelNames: ['target'], + registers: [register], + }); + + const cacheEntries = new Gauge({ + name: 'hctv_web_cache_entries', + help: 'Current number of records mirrored into Redis by cache-sync jobs.', + labelNames: ['cache'], + registers: [register], + }); + + const platformInventory = new Gauge({ + name: 'hctv_web_platform_inventory', + help: 'High-level counts of important platform records.', + labelNames: ['entity'], + registers: [register], + }); + const mediamtxAuthRequests = new Counter({ name: 'hctv_web_mediamtx_auth_requests_total', help: 'Total MediaMTX auth decisions handled by the web app.', @@ -68,11 +135,21 @@ function createMetricsStore() { return { register, activeViewers, + activeViewersByRegion, backgroundJobDuration, backgroundJobRuns, + cacheEntries, + hottestStreamViewers, liveStreams, + liveStreamTransitions, mediamtxAuthDuration, mediamtxAuthRequests, + notificationsEnqueued, + platformInventory, + streamPathsSeen, + streamsWithViewers, + streamSyncScrapes, + thumbnailRefreshTargets, thumbnailJobsEnqueued, viewerCountTrackedStreams, }; @@ -109,9 +186,38 @@ export function setLiveStreamsByRegion(streamsByRegion: Record): } } -export function setViewerSnapshot(totalViewers: number, trackedStreams: number): void { - metrics.activeViewers.set(totalViewers); - metrics.viewerCountTrackedStreams.set(trackedStreams); +export function setStreamPathsByRegion(pathsByRegion: Record): void { + metrics.streamPathsSeen.reset(); + + for (const [region, count] of Object.entries(pathsByRegion)) { + metrics.streamPathsSeen.set({ region }, count); + } +} + +export function recordLiveStreamTransition(transition: 'online' | 'offline', region: string): void { + metrics.liveStreamTransitions.inc({ transition, region }); +} + +export function recordStreamSyncScrape(region: string, status: 'success' | 'error'): void { + metrics.streamSyncScrapes.inc({ region, status }); +} + +export function setViewerSnapshot(snapshot: { + totalViewers: number; + trackedStreams: number; + viewersByRegion: Record; + streamsWithViewers: number; + hottestStreamViewers: number; +}): void { + metrics.activeViewers.set(snapshot.totalViewers); + metrics.viewerCountTrackedStreams.set(snapshot.trackedStreams); + metrics.streamsWithViewers.set(snapshot.streamsWithViewers); + metrics.hottestStreamViewers.set(snapshot.hottestStreamViewers); + metrics.activeViewersByRegion.reset(); + + for (const [region, count] of Object.entries(snapshot.viewersByRegion)) { + metrics.activeViewersByRegion.set({ region }, count); + } } export function recordThumbnailJobsEnqueued(jobsByRegion: Record): void { @@ -122,6 +228,28 @@ export function recordThumbnailJobsEnqueued(jobsByRegion: Record } } +export function setThumbnailRefreshTargets(count: number): void { + metrics.thumbnailRefreshTargets.set(count); +} + +export function recordNotificationsEnqueued(target: 'channel' | 'dm', count: number): void { + if (count > 0) { + metrics.notificationsEnqueued.inc({ target }, count); + } +} + +export function setCacheEntryCount(cache: 'sessions' | 'stream_keys', count: number): void { + metrics.cacheEntries.set({ cache }, count); +} + +export function setPlatformInventory(snapshot: Record): void { + metrics.platformInventory.reset(); + + for (const [entity, count] of Object.entries(snapshot)) { + metrics.platformInventory.set({ entity }, count); + } +} + export function recordMediamtxAuth( action: string, protocol: string, diff --git a/observability/grafana/dashboards/hctv-overview.json b/observability/grafana/dashboards/hctv-overview.json index ed6afdb..694eef9 100644 --- a/observability/grafana/dashboards/hctv-overview.json +++ b/observability/grafana/dashboards/hctv-overview.json @@ -56,14 +56,13 @@ "value": 1 } ] - }, - "unit": "none" + } }, "overrides": [] }, "gridPos": { "h": 4, - "w": 4, + "w": 3, "x": 0, "y": 0 }, @@ -80,7 +79,6 @@ }, "textMode": "value" }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "max(up{job=\"web\"})", @@ -131,8 +129,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 4, + "w": 3, + "x": 3, "y": 0 }, "id": 2, @@ -148,7 +146,6 @@ }, "textMode": "value" }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "max(up{job=\"chat\"})", @@ -199,8 +196,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 8, + "w": 3, + "x": 6, "y": 0 }, "id": 3, @@ -216,7 +213,6 @@ }, "textMode": "value" }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "max(up{job=\"mediamtx\"})", @@ -267,8 +263,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 12, + "w": 3, + "x": 9, "y": 0 }, "id": 4, @@ -284,7 +280,6 @@ }, "textMode": "value" }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "max(pg_up)", @@ -335,8 +330,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 16, + "w": 3, + "x": 12, "y": 0 }, "id": 5, @@ -352,7 +347,6 @@ }, "textMode": "value" }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "max(redis_up)", @@ -362,6 +356,132 @@ "title": "Redis", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "targets": [ + { + "expr": "hctv_web_active_viewers", + "refId": "A" + } + ], + "title": "Active Viewers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "targets": [ + { + "expr": "hctv_chat_websocket_connections", + "refId": "A" + } + ], + "title": "Chat Sockets", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "targets": [ + { + "expr": "sum(hctv_web_live_streams)", + "refId": "A" + } + ], + "title": "Live Streams", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -382,7 +502,7 @@ "x": 0, "y": 4 }, - "id": 6, + "id": 9, "options": { "legend": { "displayMode": "table", @@ -392,15 +512,19 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "sum by (region) (hctv_web_live_streams)", - "legendFormat": "{{region}}", + "legendFormat": "live {{region}}", "refId": "A" + }, + { + "expr": "sum by (region) (hctv_web_stream_paths_seen)", + "legendFormat": "paths {{region}}", + "refId": "B" } ], - "title": "Live Streams by Region", + "title": "Stream Estate by Region", "type": "timeseries" }, { @@ -423,17 +547,16 @@ "x": 8, "y": 4 }, - "id": 7, + "id": 10, "options": { "legend": { - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "hctv_web_active_viewers", @@ -441,14 +564,59 @@ "refId": "A" }, { - "expr": "hctv_chat_websocket_connections", - "legendFormat": "chat sockets", + "expr": "hctv_web_hottest_stream_viewers", + "legendFormat": "top stream viewers", "refId": "B" + }, + { + "expr": "hctv_web_streams_with_viewers", + "legendFormat": "streams with viewers", + "refId": "C" } ], "title": "Audience Pulse", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 11, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (region) (hctv_web_active_viewers_by_region)", + "legendFormat": "{{region}}", + "refId": "A" + } + ], + "title": "Viewers by Region", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -466,10 +634,10 @@ "gridPos": { "h": 8, "w": 8, - "x": 16, - "y": 4 + "x": 0, + "y": 12 }, - "id": 8, + "id": 12, "options": { "legend": { "displayMode": "table", @@ -479,7 +647,6 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "sum(rate(hctv_chat_messages_delivered_total[5m]))", @@ -488,13 +655,103 @@ }, { "expr": "sum(rate(hctv_chat_incoming_messages_total[5m]))", - "legendFormat": "all inbound frames", + "legendFormat": "inbound frames", "refId": "B" + }, + { + "expr": "sum(rate(hctv_chat_message_bytes_delivered_total[5m]))", + "legendFormat": "message bytes/sec", + "refId": "C" } ], "title": "Chat Throughput", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 13, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (channel) (hctv_chat_websocket_connections_by_channel)", + "legendFormat": "{{channel}}", + "refId": "A" + } + ], + "title": "Socket Load by Channel", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 14, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (auth_method) (hctv_chat_websocket_connections_by_auth_method)", + "legendFormat": "active {{auth_method}}", + "refId": "A" + }, + { + "expr": "sum by (auth_method, outcome) (rate(hctv_chat_websocket_connection_attempts_total[15m]))", + "legendFormat": "{{auth_method}} {{outcome}}", + "refId": "B" + } + ], + "title": "Socket Auth Mix", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -513,9 +770,9 @@ "h": 8, "w": 8, "x": 0, - "y": 12 + "y": 20 }, - "id": 9, + "id": 15, "options": { "legend": { "displayMode": "table", @@ -525,7 +782,6 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "sum by (action) (rate(hctv_chat_moderation_actions_total[15m]))", @@ -534,13 +790,93 @@ }, { "expr": "sum by (reason) (rate(hctv_chat_moderation_blocks_total[15m]))", - "legendFormat": "blocked: {{reason}}", + "legendFormat": "blocked {{reason}}", "refId": "B" } ], "title": "Moderation Load", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 16, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (type) (rate(hctv_chat_inbound_payload_bytes_total[5m]))", + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "title": "Inbound Payload Rate by Type", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 17, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (le, type) (rate(hctv_chat_message_duration_seconds_bucket[15m])))", + "legendFormat": "{{type}} p95", + "refId": "A" + } + ], + "title": "Chat Processing Latency P95", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -558,10 +894,10 @@ "gridPos": { "h": 8, "w": 8, - "x": 8, - "y": 12 + "x": 0, + "y": 28 }, - "id": 10, + "id": 18, "options": { "legend": { "displayMode": "table", @@ -571,7 +907,6 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "sum by (outcome) (rate(hctv_web_mediamtx_auth_requests_total[5m]))", @@ -599,10 +934,90 @@ "gridPos": { "h": 8, "w": 8, - "x": 16, - "y": 12 + "x": 8, + "y": 28 }, - "id": 11, + "id": 19, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (le, action, protocol) (rate(hctv_web_mediamtx_auth_duration_seconds_bucket[15m])))", + "legendFormat": "{{action}}/{{protocol}} p95", + "refId": "A" + } + ], + "title": "Media Auth Latency P95", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 20, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (job, status) (rate(hctv_web_background_job_runs_total[15m]))", + "legendFormat": "{{job}} {{status}}", + "refId": "A" + } + ], + "title": "Background Job Outcomes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 36 + }, + "id": 21, "options": { "legend": { "displayMode": "table", @@ -612,7 +1027,6 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "histogram_quantile(0.95, sum by (le, job) (rate(hctv_web_background_job_duration_seconds_bucket[15m])))", @@ -640,10 +1054,350 @@ "gridPos": { "h": 8, "w": 8, - "x": 0, - "y": 20 + "x": 8, + "y": 36 }, - "id": 12, + "id": 22, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (target) (rate(hctv_web_notifications_enqueued_total[15m]))", + "legendFormat": "{{target}}", + "refId": "A" + } + ], + "title": "Notifications Enqueued", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 36 + }, + "id": 23, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (cache) (hctv_web_cache_entries)", + "legendFormat": "{{cache}}", + "refId": "A" + }, + { + "expr": "hctv_web_thumbnail_refresh_targets", + "legendFormat": "thumbnail targets", + "refId": "B" + } + ], + "title": "Cache + Thumbnail State", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 44 + }, + "id": 24, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (entity) (hctv_web_platform_inventory)", + "legendFormat": "{{entity}}", + "refId": "A" + } + ], + "title": "Platform Inventory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 44 + }, + "id": 25, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (region, transition) (rate(hctv_web_live_stream_transitions_total[15m]))", + "legendFormat": "{{region}} {{transition}}", + "refId": "A" + }, + { + "expr": "sum by (region, status) (rate(hctv_web_stream_sync_scrapes_total[15m]))", + "legendFormat": "scrape {{region}} {{status}}", + "refId": "B" + } + ], + "title": "Stream Sync Health", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 44 + }, + "id": 26, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (sender_type) (rate(hctv_chat_unique_chatters_total[1h]))", + "legendFormat": "{{sender_type}}", + "refId": "A" + }, + { + "expr": "sum by (channel) (rate(hctv_chat_history_messages_loaded_total[15m]))", + "legendFormat": "history {{channel}}", + "refId": "B" + } + ], + "title": "Chat Participation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 52 + }, + "id": 27, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (channel, setting) (hctv_chat_moderation_state)", + "legendFormat": "{{channel}} {{setting}}", + "refId": "A" + } + ], + "title": "Channel Moderation Settings", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 52 + }, + "id": 28, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (channel) (hctv_chat_channel_history_size)", + "legendFormat": "{{channel}}", + "refId": "A" + } + ], + "title": "Chat History Footprint", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 52 + }, + "id": 29, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "expr": "sum by (phase) (rate(hctv_chat_errors_total[15m]))", + "legendFormat": "chat {{phase}}", + "refId": "A" + }, + { + "expr": "sum by (job, status) (rate(hctv_web_background_job_runs_total{status=\"error\"}[15m]))", + "legendFormat": "web {{job}} errors", + "refId": "B" + } + ], + "title": "Application Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 60 + }, + "id": 30, "options": { "legend": { "displayMode": "table", @@ -653,16 +1407,15 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "pg_stat_database_numbackends{datname=~\"hctv|postgres\"}", - "legendFormat": "database connections", + "legendFormat": "{{datname}} connections", "refId": "A" }, { "expr": "rate(pg_stat_database_xact_commit{datname=~\"hctv|postgres\"}[5m])", - "legendFormat": "commits / sec", + "legendFormat": "{{datname}} commits/sec", "refId": "B" } ], @@ -687,9 +1440,9 @@ "h": 8, "w": 8, "x": 8, - "y": 20 + "y": 60 }, - "id": 13, + "id": 31, "options": { "legend": { "displayMode": "table", @@ -699,7 +1452,6 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "redis_memory_used_bytes", @@ -733,9 +1485,9 @@ "h": 8, "w": 8, "x": 16, - "y": 20 + "y": 60 }, - "id": 14, + "id": 32, "options": { "legend": { "displayMode": "table", @@ -745,7 +1497,6 @@ "mode": "multi" } }, - "pluginVersion": "11.1.0", "targets": [ { "expr": "hctv_web_process_resident_memory_bytes", @@ -782,6 +1533,6 @@ "timezone": "browser", "title": "HackClub.tv Overview", "uid": "hctv-overview", - "version": 1, + "version": 2, "weekStart": "" }