From 527155a0c114935b7f6dc7709e89a1349b668471 Mon Sep 17 00:00:00 2001 From: Izan Gil <66965250+SrIzan10@users.noreply.github.com> Date: Fri, 6 Mar 2026 19:32:12 +0100 Subject: [PATCH] feat(metrics): initial ai impl --- apps/chat/package.json | 3 +- apps/chat/src/index.ts | 69 ++ apps/chat/src/metrics.ts | 124 +++ apps/chat/src/types/chat.ts | 2 + apps/chat/src/utils/moderation.ts | 4 + apps/web/package.json | 1 + .../(protected)/api/mediamtx/publish/route.ts | 54 +- apps/web/src/app/api/metrics/route.ts | 13 + .../src/lib/instrumentation/getLiveThumb.ts | 45 +- .../web/src/lib/instrumentation/streamInfo.ts | 159 ++-- .../src/lib/instrumentation/syncStreamKeys.ts | 41 +- .../lib/instrumentation/viewerCountSync.ts | 83 +- .../src/lib/instrumentation/writeSessions.ts | 30 +- apps/web/src/lib/metrics.ts | 133 +++ apps/web/src/lib/workers/register.ts | 11 +- compose.yml | 38 + dev/docker-compose.yml | 45 +- dev/mediamtx.yml | 2 + docker/mediamtx/mediamtx.yml | 2 + .../grafana/dashboards/hctv-overview.json | 787 ++++++++++++++++++ .../provisioning/dashboards/dashboard.yml | 10 + .../provisioning/datasources/prometheus.yml | 10 + observability/prometheus.dev.yml | 39 + observability/prometheus.yml | 39 + pnpm-lock.yaml | 27 + 25 files changed, 1576 insertions(+), 195 deletions(-) create mode 100644 apps/chat/src/metrics.ts create mode 100644 apps/web/src/app/api/metrics/route.ts create mode 100644 apps/web/src/lib/metrics.ts create mode 100644 observability/grafana/dashboards/hctv-overview.json create mode 100644 observability/grafana/provisioning/dashboards/dashboard.yml create mode 100644 observability/grafana/provisioning/datasources/prometheus.yml create mode 100644 observability/prometheus.dev.yml create mode 100644 observability/prometheus.yml diff --git a/apps/chat/package.json b/apps/chat/package.json index ded6ad1..e1f71c5 100644 --- a/apps/chat/package.json +++ b/apps/chat/package.json @@ -14,7 +14,8 @@ "@hono/node-ws": "^1.1.0", "@leeoniya/ufuzzy": "^1.0.18", "@oslojs/encoding": "^1.1.0", - "hono": "^4.7.5" + "hono": "^4.7.5", + "prom-client": "^15.1.3" }, "devDependencies": { "@types/node": "^20.11.17", diff --git a/apps/chat/src/index.ts b/apps/chat/src/index.ts index 7ceba1b..bee4c06 100644 --- a/apps/chat/src/index.ts +++ b/apps/chat/src/index.ts @@ -4,6 +4,17 @@ import { Hono } from 'hono'; import { readFile } from 'node:fs/promises'; import { lucia } from '@hctv/auth'; import { getCookie } from 'hono/cookie'; +import { + chatMetricsRegistry, + recordChatConnectionAccepted, + recordChatConnectionRejected, + recordChatDisconnect, + recordChatError, + recordChatModerationBlock, + recordDeliveredChatMessage, + recordIncomingChatMessage, + startChatMessageTimer, +} from './metrics.js'; import { getPersonalChannel } from './utils/personalChannel.js'; import { ChatModerationAction, getRedisConnection, prisma } from '@hctv/db'; import uFuzzy from '@leeoniya/ufuzzy'; @@ -268,16 +279,27 @@ app.get('/up', async (c) => { return c.text('it works'); }); +app.get('/metrics', async () => { + return new Response(await chatMetricsRegistry.metrics(), { + headers: { + 'Content-Type': chatMetricsRegistry.contentType, + 'Cache-Control': 'no-store, no-cache, must-revalidate, proxy-revalidate', + }, + }); +}); + app.get( '/ws/:username', upgradeWebSocket((c) => ({ async onOpen(evt, ws) { + let authMethod = 'unknown'; const token = getCookie(c, 'auth_session'); const grant = c.req.query('grant'); const authHeader = c.req.header('Authorization'); const botAuth = c.req.query('botAuth'); if (!token && (!grant || grant === 'null') && !authHeader && !botAuth) { + recordChatConnectionRejected('missing_auth'); ws.close(); return; } @@ -304,6 +326,7 @@ app.get( }); if (botAccount) { + authMethod = 'bot_api_key'; chatUser = { id: botAccount.botAccount.id, username: botAccount.botAccount.slug, @@ -327,6 +350,7 @@ app.get( if (session.user) { const userChannel = await getPersonalChannel(session.user.id); if (userChannel) { + authMethod = 'session'; chatUser = { id: session.user.id, username: userChannel.name, @@ -349,16 +373,22 @@ app.get( : null; if (!chatUser && !dbGrant) { + recordChatConnectionRejected('auth_failed'); ws.close(); return; } const { username } = c.req.param(); if (dbGrant && dbGrant.name !== username) { + recordChatConnectionRejected('grant_mismatch'); ws.close(); return; } + if (!chatUser && dbGrant) { + authMethod = 'obs_grant'; + } + const channel = await prisma.channel.findUnique({ where: { name: username }, select: { @@ -383,6 +413,7 @@ app.get( }); if (!channel) { + recordChatConnectionRejected(authMethod === 'unknown' ? 'channel_not_found' : authMethod); ws.close(); return; } @@ -443,6 +474,10 @@ app.get( socketState.personalChannel = personalChannel; socketState.viewerId = socket.viewerId; socketState.isModerator = isModerator; + socket.metricsTracked = true; + socketState.metricsTracked = true; + + recordChatConnectionAccepted(authMethod); socket.send( JSON.stringify({ @@ -486,6 +521,11 @@ app.get( if (process.env.NODE_ENV !== 'production') console.log('client disconnected'); if (!socketState.targetUsername) return; + if (socketState.metricsTracked) { + recordChatDisconnect(); + socketState.metricsTracked = false; + } + const streamInfo = await prisma.streamInfo.findUnique({ where: { username: socketState.targetUsername, @@ -500,10 +540,17 @@ app.get( await redis.del(`viewer:${socketState.targetUsername}:${socketState.viewerId}`); }, async onMessage(evt, ws) { + let outcome = 'ignored'; + let messageType = 'unknown'; + let stopTimer: ReturnType | null = null; + try { const socket = ws as unknown as ChatSocket; const socketState = resolveSocketState(socket); const msg = JSON.parse(evt.data.toString()) as IncomingMessage; + messageType = typeof msg.type === 'string' ? msg.type : 'unknown'; + recordIncomingChatMessage(messageType); + stopTimer = startChatMessageTimer(messageType); if (msg.type === 'ping') { await redis.setex( @@ -512,6 +559,7 @@ app.get( '1' ); socket.send(JSON.stringify({ type: 'pong' })); + outcome = 'pong'; return; } @@ -521,6 +569,7 @@ app.get( logModerationEvent, broadcastToChannel, }); + outcome = 'moderation'; return; } @@ -535,6 +584,7 @@ app.get( broadcastRestrictionStateToUser, broadcastToChannel, }); + outcome = 'moderation'; return; } @@ -571,6 +621,7 @@ app.get( ); await sendChatAccessState(socket, channelId, chatUser.id); + outcome = 'blocked'; return; } @@ -584,6 +635,8 @@ app.get( )) ) { sendModerationError(socket, 'RATE_LIMIT', 'You are sending messages too fast.'); + recordChatModerationBlock('rate_limit'); + outcome = 'rate_limited'; return; } @@ -592,6 +645,8 @@ app.get( const timeRemaining = await redis.ttl(slowModeKey); if (timeRemaining > 0) { sendModerationError(socket, 'SLOW_MODE', `Slow mode is on. Wait ${timeRemaining}s.`); + recordChatModerationBlock('slow_mode'); + outcome = 'slow_mode'; return; } await redis.setex(slowModeKey, moderationSettings.slowModeSeconds, '1'); @@ -607,6 +662,8 @@ app.get( 'MESSAGE_TOO_LONG', `Message exceeds ${moderationSettings.maxMessageLength} characters.` ); + recordChatModerationBlock('message_too_long'); + outcome = 'message_too_long'; return; } @@ -622,7 +679,9 @@ app.get( details: { blockedTerm }, }); } + recordChatModerationBlock('blocked_term'); sendModerationError(socket, 'BLOCKED_TERM', 'Message blocked by channel moderation.'); + outcome = 'blocked_term'; return; } @@ -650,6 +709,8 @@ app.get( await redis.expire(channelKey, MESSAGE_TTL); broadcastToChannel(targetUsername, socket, msgObj as unknown as Record); + recordDeliveredChatMessage(chatUser.isBot ? 'bot' : 'user'); + outcome = 'broadcast'; } if (msg.type === 'emojiMsg') { if (!socketState.chatUser) return; @@ -677,12 +738,14 @@ app.get( emojis: emojiMap, }) ); + outcome = 'emoji_lookup'; } if (msg.type === 'emojiSearch') { if (!socketState.chatUser) return; const rawSearchTerm = (msg.searchTerm as string)?.trim() ?? ''; if (!rawSearchTerm || rawSearchTerm.length > 50) { ws.send(JSON.stringify({ type: 'emojiSearchResponse', results: [] })); + outcome = 'emoji_search_empty'; return; } const searchTerm = rawSearchTerm; @@ -712,6 +775,7 @@ app.get( results: results, }) ); + outcome = 'emoji_search'; } else { ws.send( JSON.stringify({ @@ -719,10 +783,15 @@ app.get( results: [], }) ); + outcome = 'emoji_search_empty'; } } } catch (e) { + outcome = 'error'; + recordChatError('on_message'); console.error('Error processing message:', e); + } finally { + stopTimer?.({ type: messageType, outcome }); } }, })) diff --git a/apps/chat/src/metrics.ts b/apps/chat/src/metrics.ts new file mode 100644 index 0000000..c21f277 --- /dev/null +++ b/apps/chat/src/metrics.ts @@ -0,0 +1,124 @@ +import { collectDefaultMetrics, Counter, Gauge, Histogram, Registry } from 'prom-client'; + +function createMetricsStore() { + const register = new Registry(); + register.setDefaultLabels({ app: 'chat' }); + + collectDefaultMetrics({ + prefix: 'hctv_chat_', + register, + }); + + const websocketConnections = new Gauge({ + name: 'hctv_chat_websocket_connections', + help: 'Current number of active chat websocket connections.', + registers: [register], + }); + + const websocketConnectionAttempts = new Counter({ + name: 'hctv_chat_websocket_connection_attempts_total', + help: 'Total websocket connection attempts grouped by outcome and auth method.', + labelNames: ['outcome', 'auth_method'], + registers: [register], + }); + + const incomingMessages = new Counter({ + name: 'hctv_chat_incoming_messages_total', + help: 'Total inbound websocket frames grouped by message type.', + labelNames: ['type'], + registers: [register], + }); + + const messageDuration = new Histogram({ + name: 'hctv_chat_message_duration_seconds', + help: 'Chat websocket message processing time in seconds.', + labelNames: ['type', 'outcome'], + buckets: [0.0005, 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1], + registers: [register], + }); + + const deliveredMessages = new Counter({ + name: 'hctv_chat_messages_delivered_total', + help: 'Total chat messages successfully broadcast, grouped by sender type.', + labelNames: ['sender_type'], + registers: [register], + }); + + const moderationActions = new Counter({ + name: 'hctv_chat_moderation_actions_total', + help: 'Successful moderation actions performed in chat.', + labelNames: ['action'], + registers: [register], + }); + + const moderationBlocks = new Counter({ + name: 'hctv_chat_moderation_blocks_total', + help: 'Message blocks and throttling decisions grouped by reason.', + labelNames: ['reason'], + registers: [register], + }); + + const errors = new Counter({ + name: 'hctv_chat_errors_total', + help: 'Errors observed in the chat service grouped by phase.', + labelNames: ['phase'], + registers: [register], + }); + + return { + deliveredMessages, + errors, + incomingMessages, + messageDuration, + moderationActions, + moderationBlocks, + register, + websocketConnectionAttempts, + websocketConnections, + }; +} + +const globalForMetrics = globalThis as typeof globalThis & { + __hctvChatMetrics?: ReturnType; +}; + +const metrics = (globalForMetrics.__hctvChatMetrics ??= createMetricsStore()); + +export const chatMetricsRegistry = metrics.register; + +export function recordChatConnectionAccepted(authMethod: string): void { + metrics.websocketConnectionAttempts.inc({ auth_method: authMethod, outcome: 'accepted' }); + metrics.websocketConnections.inc(); +} + +export function recordChatConnectionRejected(authMethod: string): void { + metrics.websocketConnectionAttempts.inc({ auth_method: authMethod, outcome: 'rejected' }); +} + +export function recordChatDisconnect(): void { + metrics.websocketConnections.dec(); +} + +export function recordIncomingChatMessage(type: string): void { + metrics.incomingMessages.inc({ type }); +} + +export function startChatMessageTimer(type: string) { + return metrics.messageDuration.startTimer({ type }); +} + +export function recordDeliveredChatMessage(senderType: string): void { + metrics.deliveredMessages.inc({ sender_type: senderType }); +} + +export function recordChatModerationAction(action: string): void { + metrics.moderationActions.inc({ action }); +} + +export function recordChatModerationBlock(reason: string): void { + metrics.moderationBlocks.inc({ reason }); +} + +export function recordChatError(phase: string): void { + metrics.errors.inc({ phase }); +} diff --git a/apps/chat/src/types/chat.ts b/apps/chat/src/types/chat.ts index c68c184..bb9714d 100644 --- a/apps/chat/src/types/chat.ts +++ b/apps/chat/src/types/chat.ts @@ -39,6 +39,7 @@ export interface ChatSocket { personalChannel?: any; viewerId?: string; isModerator?: boolean; + metricsTracked?: boolean; raw?: | (ModifiedWebSocket & { targetUsername?: string; @@ -46,6 +47,7 @@ export interface ChatSocket { chatUser?: ChatUser | null; personalChannel?: any; isModerator?: boolean; + metricsTracked?: boolean; }) | null; } diff --git a/apps/chat/src/utils/moderation.ts b/apps/chat/src/utils/moderation.ts index 0a06821..afc6f82 100644 --- a/apps/chat/src/utils/moderation.ts +++ b/apps/chat/src/utils/moderation.ts @@ -1,4 +1,5 @@ import { ChatModerationAction, prisma } from '@hctv/db'; +import { recordChatModerationAction } from '../metrics.js'; import type { ChatModerationCommand, ChatRestrictionState, @@ -270,6 +271,7 @@ export async function handleDeleteMessageCommand( reason: 'Message deleted by moderator', details: { msgId }, }); + recordChatModerationAction('message_deleted'); deps.broadcastToChannel(context.targetUsername, socket, { type: 'messageDeleted', msgId }); } @@ -330,6 +332,7 @@ export async function handleUserRestrictionCommand( targetUserId: target.targetUserId, reason: 'User unbanned in chat', }); + recordChatModerationAction('user_unbanned'); await deps.broadcastRestrictionStateToUser( context.targetUsername, @@ -389,6 +392,7 @@ export async function handleUserRestrictionCommand( reason, details: durationSeconds ? { durationSeconds } : undefined, }); + recordChatModerationAction(msg.type === 'mod:timeoutUser' ? 'user_timeout' : 'user_banned'); await deps.broadcastRestrictionStateToUser( context.targetUsername, diff --git a/apps/web/package.json b/apps/web/package.json index 46d9480..306c7f1 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -60,6 +60,7 @@ "nuqs": "^2.4.3", "pg": "^8.14.1", "pg-boss": "^10.1.6", + "prom-client": "^15.1.3", "react": "^19.2.3", "react-day-picker": "^9.13.0", "react-dom": "^19.2.3", diff --git a/apps/web/src/app/(ui)/(protected)/api/mediamtx/publish/route.ts b/apps/web/src/app/(ui)/(protected)/api/mediamtx/publish/route.ts index 4a62430..7defa24 100644 --- a/apps/web/src/app/(ui)/(protected)/api/mediamtx/publish/route.ts +++ b/apps/web/src/app/(ui)/(protected)/api/mediamtx/publish/route.ts @@ -1,29 +1,39 @@ import { prisma, getRedisConnection } from '@hctv/db'; +import { recordMediamtxAuth } from '@/lib/metrics'; import { NextRequest } from 'next/server'; import { z } from 'zod'; export async function POST(request: NextRequest) { + const startedAt = performance.now(); + let action = 'invalid'; + let protocol = 'invalid'; + + const finish = (body: string, status: number, outcome: string) => { + recordMediamtxAuth(action, protocol, outcome, (performance.now() - startedAt) / 1000); + return new Response(body, { status }); + }; + const redis = getRedisConnection(); const body = await request.json(); if (process.env.NODE_ENV !== 'production') { - console.log( - 'Mediamtx publish auth request:', - JSON.stringify(body, null, 2) - ); - }; + console.log('Mediamtx publish auth request:', JSON.stringify(body, null, 2)); + } const parsed = schema.safeParse(body); if (!parsed.success) { - return new Response('invalid request', { status: 400 }); + return finish('invalid request', 400, 'invalid_request'); } - const { action, protocol, path, password } = parsed.data; - if (action === 'publish' && protocol === 'srt') { + const { action: parsedAction, protocol: parsedProtocol, path, password } = parsed.data; + action = parsedAction; + protocol = parsedProtocol; + + if (parsedAction === 'publish' && parsedProtocol === 'srt') { const channelKey = await redis.get(`streamKey:${path}`); if (channelKey) { if (channelKey !== password) { - return new Response('invalid stream key', { status: 403 }); + return finish('invalid stream key', 403, 'invalid_stream_key'); } const channel = await prisma.channel.findUnique({ @@ -38,39 +48,39 @@ export async function POST(request: NextRequest) { }); if (channel?.restriction) { - const isExpired = channel.restriction.expiresAt && - new Date(channel.restriction.expiresAt) < new Date(); + const isExpired = + channel.restriction.expiresAt && new Date(channel.restriction.expiresAt) < new Date(); if (!isExpired) { - return new Response('channel restricted', { status: 403 }); + return finish('channel restricted', 403, 'channel_restricted'); } } if (channel?.owner?.ban) { - const isExpired = channel.owner.ban.expiresAt && - new Date(channel.owner.ban.expiresAt) < new Date(); + const isExpired = + channel.owner.ban.expiresAt && new Date(channel.owner.ban.expiresAt) < new Date(); if (!isExpired) { - return new Response('user banned', { status: 403 }); + return finish('user banned', 403, 'user_banned'); } } if (channel?.streamInfo[0].isLive) { - return new Response('stream already live', { status: 403 }); + return finish('stream already live', 403, 'stream_already_live'); } - return new Response('youre in yay', { status: 200 }); + return finish('youre in yay', 200, 'authorized_publish'); } - } else if (action === 'read' && protocol === 'hls') { + } else if (parsedAction === 'read' && parsedProtocol === 'hls') { if (password === process.env.MEDIAMTX_PUBLISH_KEY) { - return new Response('authorized (hls read key for thumbs)', { status: 200 }); + return finish('authorized (hls read key for thumbs)', 200, 'authorized_thumbnail'); } const sessionExists = await redis.exists(`sessions:${password}`); if (!sessionExists) { - return new Response('unauthorized', { status: 401 }); + return finish('unauthorized', 401, 'unauthorized_session'); } - return new Response('authorized', { status: 200 }); + return finish('authorized', 200, 'authorized_read'); } - return new Response('uhh', { status: 401 }); + return finish('uhh', 401, 'unauthorized'); } const schema = z.object({ diff --git a/apps/web/src/app/api/metrics/route.ts b/apps/web/src/app/api/metrics/route.ts new file mode 100644 index 0000000..5510a74 --- /dev/null +++ b/apps/web/src/app/api/metrics/route.ts @@ -0,0 +1,13 @@ +import { webMetricsRegistry } from '@/lib/metrics'; + +export const runtime = 'nodejs'; +export const dynamic = 'force-dynamic'; + +export async function GET() { + return new Response(await webMetricsRegistry.metrics(), { + headers: { + 'Content-Type': webMetricsRegistry.contentType, + 'Cache-Control': 'no-store, no-cache, must-revalidate, proxy-revalidate', + }, + }); +} diff --git a/apps/web/src/lib/instrumentation/getLiveThumb.ts b/apps/web/src/lib/instrumentation/getLiveThumb.ts index 47d9a8e..a354b7f 100644 --- a/apps/web/src/lib/instrumentation/getLiveThumb.ts +++ b/apps/web/src/lib/instrumentation/getLiveThumb.ts @@ -1,23 +1,28 @@ -import { prisma } from "@hctv/db"; -import { getThumbnailQueue } from "../workers"; +import { prisma } from '@hctv/db'; +import { recordThumbnailJobsEnqueued, trackWebJob } from '../metrics'; +import { getThumbnailQueue } from '../workers'; export default async function getLiveThumb() { - const liveChannels = await prisma.streamInfo.findMany({ - where: { - isLive: true, - }, - include: { - channel: true, - } - }); - const liveChannelNames = liveChannels.map((channel) => channel.channel.name); - - const thumbQueue = getThumbnailQueue(); - for (const channel of liveChannelNames) { - const lc = liveChannels.find(c => c.channel.name === channel)!; - await thumbQueue.add("getLiveThumb", { - name: channel, - server: lc.streamRegion, + return trackWebJob('thumbnail_refresh', async () => { + const liveChannels = await prisma.streamInfo.findMany({ + where: { + isLive: true, + }, + include: { + channel: true, + }, }); - } -} \ No newline at end of file + const thumbQueue = getThumbnailQueue(); + const jobsByRegion: Record = {}; + + for (const liveChannel of liveChannels) { + await thumbQueue.add('getLiveThumb', { + name: liveChannel.channel.name, + server: liveChannel.streamRegion, + }); + jobsByRegion[liveChannel.streamRegion] = (jobsByRegion[liveChannel.streamRegion] ?? 0) + 1; + } + + recordThumbnailJobsEnqueued(jobsByRegion); + }); +} diff --git a/apps/web/src/lib/instrumentation/streamInfo.ts b/apps/web/src/lib/instrumentation/streamInfo.ts index b5d76a0..c3aa292 100644 --- a/apps/web/src/lib/instrumentation/streamInfo.ts +++ b/apps/web/src/lib/instrumentation/streamInfo.ts @@ -1,4 +1,5 @@ import { prisma } from '@hctv/db'; +import { setLiveStreamsByRegion, trackWebJob } from '../metrics'; import { HttpFlv } from '../types/liveBackendJson'; import { getNotificationQueue } from '../workers'; import client from '../services/slackNotifier'; @@ -50,103 +51,107 @@ export async function initializeStreamInfo(channelId?: string) { export async function syncStream() { try { - const regions = Object.keys(MEDIAMTX_SERVER_REGIONS) as Array< - keyof typeof MEDIAMTX_SERVER_REGIONS - >; + await trackWebJob('stream_sync', async () => { + const regions = Object.keys(MEDIAMTX_SERVER_REGIONS) as Array< + keyof typeof MEDIAMTX_SERVER_REGIONS + >; - const allActiveStreams = new Map(); + const allActiveStreams = new Map(); + const liveStreamsByRegion = Object.fromEntries(regions.map((region) => [region, 0])); - for (const r of regions) { - const region = MEDIAMTX_SERVER_REGIONS[r]; - const response = await fetch(`${region.apiUrl}/v3/paths/list?itemsPerPage=1000`); + for (const r of regions) { + const region = MEDIAMTX_SERVER_REGIONS[r]; + const response = await fetch(`${region.apiUrl}/v3/paths/list?itemsPerPage=1000`); - if (!response.ok) { - console.error( - `Failed to fetch ${r} stream stats: ${response.status} ${response.statusText}` - ); - continue; - } + if (!response.ok) { + console.error( + `Failed to fetch ${r} stream stats: ${response.status} ${response.statusText}` + ); + continue; + } - type ResponseType = - paths['/v3/paths/list']['get']['responses']['200']['content']['application/json']; - const data = (await response.json()) as ResponseType; + type ResponseType = + paths['/v3/paths/list']['get']['responses']['200']['content']['application/json']; + const data = (await response.json()) as ResponseType; - if (data?.items) { - for (const stream of data.items) { - if (stream.ready && stream.name) { - allActiveStreams.set(stream.name, r); + if (data?.items) { + for (const stream of data.items) { + if (stream.ready && stream.name) { + allActiveStreams.set(stream.name, r); + liveStreamsByRegion[r] += 1; + } } } } - } - // handle streams going offline - const currentLiveStreams = await prisma.streamInfo.findMany({ - where: { isLive: true }, - }); + setLiveStreamsByRegion(liveStreamsByRegion); - for (const dbStream of currentLiveStreams) { - if (!allActiveStreams.has(dbStream.username)) { - await prisma.streamInfo.update({ - where: { username: dbStream.username }, - data: { - isLive: false, - viewers: 0, - startedAt: new Date(0), - }, - }); - } - } - - // handle streams going online - for (const [username, regionKey] of allActiveStreams) { - const existingStream = await prisma.streamInfo.findUnique({ - where: { username }, - include: { channel: true }, + const currentLiveStreams = await prisma.streamInfo.findMany({ + where: { isLive: true }, }); - if (existingStream && !existingStream.isLive) { - console.log(`Stream ${username} is now live in region ${regionKey}`); - await prisma.streamInfo.update({ - where: { username }, - data: { - isLive: true, - startedAt: new Date(), - streamRegion: regionKey, - }, - }); - - const subscribedFollowers = await prisma.follow.findMany({ - where: { - channelId: existingStream.channelId, - notifyStream: true, - }, - include: { - user: true, - }, - }); - - const queue = getNotificationQueue(); - - if (!existingStream.channel.is247) { - queue.add(`streamStartChannel:${existingStream.username}`, { - text: `${existingStream.username} is now *live*, streaming *${existingStream.title}* (${existingStream.category})!\n`, - channel: process.env.NOTIFICATION_CHANNEL_ID!, - unfurl_links: true, + for (const dbStream of currentLiveStreams) { + if (!allActiveStreams.has(dbStream.username)) { + await prisma.streamInfo.update({ + where: { username: dbStream.username }, + data: { + isLive: false, + viewers: 0, + startedAt: new Date(0), + }, }); } + } - if (existingStream.enableNotifications && !existingStream.channel.is247) { - for (const follower of subscribedFollowers) { - queue.add(`streamStartDm:${follower.user.id}`, { - text: `${existingStream.username} is now *live*, streaming *${existingStream.title}* (${existingStream.category})!\n\n_Stream notifications are enabled for this user. If you want to disable them, you can do so in \`Profile > Follows\`._`, - channel: follower.user.slack_id, + for (const [username, regionKey] of allActiveStreams) { + const existingStream = await prisma.streamInfo.findUnique({ + where: { username }, + include: { channel: true }, + }); + + if (existingStream && !existingStream.isLive) { + console.log(`Stream ${username} is now live in region ${regionKey}`); + await prisma.streamInfo.update({ + where: { username }, + data: { + isLive: true, + startedAt: new Date(), + streamRegion: regionKey, + }, + }); + + const subscribedFollowers = await prisma.follow.findMany({ + where: { + channelId: existingStream.channelId, + notifyStream: true, + }, + include: { + user: true, + }, + }); + + const queue = getNotificationQueue(); + + if (!existingStream.channel.is247) { + queue.add(`streamStartChannel:${existingStream.username}`, { + text: `${existingStream.username} is now *live*, streaming *${existingStream.title}* (${existingStream.category})!\n`, + channel: process.env.NOTIFICATION_CHANNEL_ID!, unfurl_links: true, }); } + + if (existingStream.enableNotifications && !existingStream.channel.is247) { + for (const follower of subscribedFollowers) { + queue.add(`streamStartDm:${follower.user.id}`, { + text: `${existingStream.username} is now *live*, streaming *${existingStream.title}* (${existingStream.category})!\n\n_Stream notifications are enabled for this user. If you want to disable them, you can do so in \`Profile > Follows\`._`, + channel: follower.user.slack_id, + unfurl_links: true, + }); + } + } } } - } + }); } catch (error) { console.error('Error syncing stream status:', error); } diff --git a/apps/web/src/lib/instrumentation/syncStreamKeys.ts b/apps/web/src/lib/instrumentation/syncStreamKeys.ts index c42b8be..45b6e17 100644 --- a/apps/web/src/lib/instrumentation/syncStreamKeys.ts +++ b/apps/web/src/lib/instrumentation/syncStreamKeys.ts @@ -1,30 +1,33 @@ import { prisma, getRedisConnection } from '@hctv/db'; +import { trackWebJob } from '../metrics'; export default async function syncStreamKeys() { console.log('Syncing stream keys to Redis...'); try { - const keys = await prisma.streamKey.findMany({ - include: { - channel: true, - }, - }); + await trackWebJob('sync_stream_keys', async () => { + const keys = await prisma.streamKey.findMany({ + include: { + channel: true, + }, + }); - if (keys.length === 0) { - console.log('No stream keys found to sync.'); - return; - } - - const redis = getRedisConnection(); - const pipeline = redis.pipeline(); - - for (const key of keys) { - if (key.channel && key.channel.name) { - pipeline.set(`streamKey:${key.channel.name}`, key.key); + if (keys.length === 0) { + console.log('No stream keys found to sync.'); + return; } - } - await pipeline.exec(); - console.log(`Synced ${keys.length} stream keys to Redis`); + const redis = getRedisConnection(); + const pipeline = redis.pipeline(); + + for (const key of keys) { + if (key.channel && key.channel.name) { + pipeline.set(`streamKey:${key.channel.name}`, key.key); + } + } + + await pipeline.exec(); + console.log(`Synced ${keys.length} stream keys to Redis`); + }); } catch (error) { console.error('Failed to sync stream keys to Redis:', error); } diff --git a/apps/web/src/lib/instrumentation/viewerCountSync.ts b/apps/web/src/lib/instrumentation/viewerCountSync.ts index 4755f87..28f0939 100644 --- a/apps/web/src/lib/instrumentation/viewerCountSync.ts +++ b/apps/web/src/lib/instrumentation/viewerCountSync.ts @@ -1,40 +1,51 @@ -import { getRedisConnection, prisma } from "@hctv/db"; +import { getRedisConnection, prisma } from '@hctv/db'; +import { setViewerSnapshot, trackWebJob } from '../metrics'; export async function viewerCountSync() { - const streams = await prisma.streamInfo.findMany({ - where: { - isLive: true - }, - include: { - channel: true - } - }) - - if (streams.length === 0) { - return; - } - - const redis = getRedisConnection(); - const multi = redis.multi(); - for (const stream of streams) { - multi.keys(`viewer:${stream.channel.name}:*`); - } - const results = await multi.exec(); - - await prisma.$transaction(async (tx) => { - const updates = results?.map((res, index) => { - const count = Array.isArray(res[1]) ? res[1].length : 0; - const stream = streams[index]; - return tx.streamInfo.update({ + try { + await trackWebJob('viewer_count_sync', async () => { + const streams = await prisma.streamInfo.findMany({ where: { - // using username here because it uses a map - username: stream.username + isLive: true, }, - data: { - viewers: count - } - }) - }) - await Promise.all(updates || []); - }) -} \ No newline at end of file + include: { + channel: true, + }, + }); + + if (streams.length === 0) { + setViewerSnapshot(0, 0); + return; + } + + const redis = getRedisConnection(); + const multi = redis.multi(); + for (const stream of streams) { + multi.keys(`viewer:${stream.channel.name}:*`); + } + const results = await multi.exec(); + let totalViewers = 0; + + await prisma.$transaction(async (tx) => { + const updates = results?.map((res, index) => { + const count = Array.isArray(res[1]) ? res[1].length : 0; + totalViewers += count; + const stream = streams[index]; + return tx.streamInfo.update({ + where: { + username: stream.username, + }, + data: { + viewers: count, + }, + }); + }); + await Promise.all(updates || []); + }); + + setViewerSnapshot(totalViewers, streams.length); + }); + } catch (error) { + console.error('Error syncing viewer counts:', error); + } +} diff --git a/apps/web/src/lib/instrumentation/writeSessions.ts b/apps/web/src/lib/instrumentation/writeSessions.ts index 6df2377..f31bfb6 100644 --- a/apps/web/src/lib/instrumentation/writeSessions.ts +++ b/apps/web/src/lib/instrumentation/writeSessions.ts @@ -1,17 +1,19 @@ -import { prisma } from "@hctv/db"; -import { getRedisConnection } from "@hctv/db"; +import { getRedisConnection, prisma } from '@hctv/db'; +import { trackWebJob } from '../metrics'; export default async function writeSessions() { - const sessions = await prisma.session.findMany(); - const sessionIds = sessions.map((session) => session.id); - - const redis = getRedisConnection(); - const multi = redis.multi(); - multi.del('sessions:*') - for (const sessionId of sessionIds) { - multi.set(`sessions:${sessionId}`, ''); - } - await multi.exec(); + return trackWebJob('write_sessions', async () => { + const sessions = await prisma.session.findMany(); + const sessionIds = sessions.map((session) => session.id); - console.log("Sessions written to Redis"); -} \ No newline at end of file + const redis = getRedisConnection(); + const multi = redis.multi(); + multi.del('sessions:*'); + for (const sessionId of sessionIds) { + multi.set(`sessions:${sessionId}`, ''); + } + await multi.exec(); + + console.log('Sessions written to Redis'); + }); +} diff --git a/apps/web/src/lib/metrics.ts b/apps/web/src/lib/metrics.ts new file mode 100644 index 0000000..797e69d --- /dev/null +++ b/apps/web/src/lib/metrics.ts @@ -0,0 +1,133 @@ +import { collectDefaultMetrics, Counter, Gauge, Histogram, Registry } from 'prom-client'; + +function createMetricsStore() { + const register = new Registry(); + register.setDefaultLabels({ app: 'web' }); + + collectDefaultMetrics({ + prefix: 'hctv_web_', + register, + }); + + const backgroundJobRuns = new Counter({ + name: 'hctv_web_background_job_runs_total', + help: 'Total number of background jobs run by the web app.', + labelNames: ['job', 'status'], + registers: [register], + }); + + const backgroundJobDuration = new Histogram({ + name: 'hctv_web_background_job_duration_seconds', + help: 'Background job execution time in seconds.', + labelNames: ['job', 'status'], + buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30], + registers: [register], + }); + + const liveStreams = new Gauge({ + name: 'hctv_web_live_streams', + help: 'Current number of live streams grouped by MediaMTX region.', + labelNames: ['region'], + registers: [register], + }); + + const activeViewers = new Gauge({ + name: 'hctv_web_active_viewers', + help: 'Current number of active viewers across all live streams.', + registers: [register], + }); + + const viewerCountTrackedStreams = new Gauge({ + name: 'hctv_web_viewer_count_tracked_streams', + help: 'Number of live streams included in the latest viewer sync.', + registers: [register], + }); + + const thumbnailJobsEnqueued = new Counter({ + name: 'hctv_web_thumbnail_jobs_enqueued_total', + help: 'Total thumbnail refresh jobs enqueued by region.', + labelNames: ['region'], + registers: [register], + }); + + const mediamtxAuthRequests = new Counter({ + name: 'hctv_web_mediamtx_auth_requests_total', + help: 'Total MediaMTX auth decisions handled by the web app.', + labelNames: ['action', 'protocol', 'outcome'], + registers: [register], + }); + + const mediamtxAuthDuration = new Histogram({ + name: 'hctv_web_mediamtx_auth_duration_seconds', + help: 'MediaMTX auth request duration in seconds.', + labelNames: ['action', 'protocol', 'outcome'], + buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5], + registers: [register], + }); + + return { + register, + activeViewers, + backgroundJobDuration, + backgroundJobRuns, + liveStreams, + mediamtxAuthDuration, + mediamtxAuthRequests, + thumbnailJobsEnqueued, + viewerCountTrackedStreams, + }; +} + +const globalForMetrics = globalThis as typeof globalThis & { + __hctvWebMetrics?: ReturnType; +}; + +const metrics = (globalForMetrics.__hctvWebMetrics ??= createMetricsStore()); + +export const webMetricsRegistry = metrics.register; + +export async function trackWebJob(job: string, fn: () => Promise): Promise { + const stopTimer = metrics.backgroundJobDuration.startTimer({ job }); + let status = 'success'; + + try { + return await fn(); + } catch (error) { + status = 'error'; + throw error; + } finally { + metrics.backgroundJobRuns.inc({ job, status }); + stopTimer({ job, status }); + } +} + +export function setLiveStreamsByRegion(streamsByRegion: Record): void { + metrics.liveStreams.reset(); + + for (const [region, count] of Object.entries(streamsByRegion)) { + metrics.liveStreams.set({ region }, count); + } +} + +export function setViewerSnapshot(totalViewers: number, trackedStreams: number): void { + metrics.activeViewers.set(totalViewers); + metrics.viewerCountTrackedStreams.set(trackedStreams); +} + +export function recordThumbnailJobsEnqueued(jobsByRegion: Record): void { + for (const [region, count] of Object.entries(jobsByRegion)) { + if (count > 0) { + metrics.thumbnailJobsEnqueued.inc({ region }, count); + } + } +} + +export function recordMediamtxAuth( + action: string, + protocol: string, + outcome: string, + durationSeconds: number +): void { + metrics.mediamtxAuthRequests.inc({ action, protocol, outcome }); + metrics.mediamtxAuthDuration.observe({ action, protocol, outcome }, durationSeconds); +} diff --git a/apps/web/src/lib/workers/register.ts b/apps/web/src/lib/workers/register.ts index 7df4438..c0de7ff 100644 --- a/apps/web/src/lib/workers/register.ts +++ b/apps/web/src/lib/workers/register.ts @@ -1,8 +1,11 @@ import { registerNotificationWorker } from './worker/notification'; import { registerThumbnailWorker } from './worker/thumbnails'; +import { trackWebJob } from '../metrics'; export async function registerWorkers(): Promise { - await registerNotificationWorker(); - await registerThumbnailWorker(); - console.log('All workers registered successfully'); -} \ No newline at end of file + await trackWebJob('register_workers', async () => { + await registerNotificationWorker(); + await registerThumbnailWorker(); + console.log('All workers registered successfully'); + }); +} diff --git a/compose.yml b/compose.yml index 142397b..0818668 100644 --- a/compose.yml +++ b/compose.yml @@ -63,3 +63,41 @@ services: dockerfile: docker/mediamtx/Dockerfile ports: - '8890:8890/udp' + postgres-exporter: + image: 'prometheuscommunity/postgres-exporter:v0.17.1' + environment: + DATA_SOURCE_NAME: 'postgresql://postgres:${PG_PASS}@postgres:5432/hctv?sslmode=disable' + redis-exporter: + image: 'oliver006/redis_exporter:v1.67.0' + environment: + REDIS_ADDR: 'redis://redis:6379' + prometheus: + image: 'prom/prometheus:v3.4.2' + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.enable-lifecycle' + volumes: + - './observability/prometheus.yml:/etc/prometheus/prometheus.yml:ro' + - 'prometheus_data:/prometheus' + extra_hosts: + - 'host.docker.internal:host-gateway' + grafana: + image: 'grafana/grafana:11.6.0' + depends_on: + - prometheus + environment: + GF_SECURITY_ADMIN_USER: '${GRAFANA_ADMIN_USER:-admin}' + GF_SECURITY_ADMIN_PASSWORD: '${GRAFANA_ADMIN_PASSWORD:-admin}' + GF_USERS_DEFAULT_THEME: light + volumes: + - './observability/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro' + - './observability/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro' + - './observability/grafana/dashboards:/var/lib/grafana/dashboards:ro' + - 'grafana_data:/var/lib/grafana' + +volumes: + hctv_pgdata: + hctv_redis: + prometheus_data: + grafana_data: diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml index cac25e3..87f8588 100644 --- a/dev/docker-compose.yml +++ b/dev/docker-compose.yml @@ -9,22 +9,59 @@ services: - ./psql:/var/lib/postgresql ports: - 5555:5432 + postgres-exporter: + image: prometheuscommunity/postgres-exporter:v0.17.1 + environment: + DATA_SOURCE_NAME: postgresql://postgres:skbiditoilet@psql:5432/postgres?sslmode=disable redis: image: redis:7.4-alpine volumes: - ./redis:/data ports: - 6379:6379 + redis-exporter: + image: oliver006/redis_exporter:v1.67.0 + environment: + REDIS_ADDR: redis://redis:6379 mediamtx: image: bluenviron/mediamtx:latest ports: - 8890:8890/udp - 8891:8888 - 9997:9997 + - 9998:9998 volumes: - ./mediamtx.yml:/mediamtx.yml extra_hosts: - - "host.docker.internal:host-gateway" + - 'host.docker.internal:host-gateway' + prometheus: + image: prom/prometheus:v3.4.2 + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + volumes: + - ../observability/prometheus.dev.yml:/etc/prometheus/prometheus.yml:ro + - prometheus_data:/prometheus + ports: + - 9090:9090 + extra_hosts: + - 'host.docker.internal:host-gateway' + grafana: + image: grafana/grafana:11.6.0 + depends_on: + - prometheus + environment: + GF_SECURITY_ADMIN_USER: admin + GF_SECURITY_ADMIN_PASSWORD: admin + GF_USERS_DEFAULT_THEME: light + volumes: + - ../observability/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro + - ../observability/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro + - ../observability/grafana/dashboards:/var/lib/grafana/dashboards:ro + - grafana_data:/var/lib/grafana + ports: + - 3001:3000 # mediamtx2: # image: bluenviron/mediamtx:latest # ports: @@ -34,4 +71,8 @@ services: # volumes: # - ./mediamtx.yml:/mediamtx.yml # extra_hosts: - # - "host.docker.internal:host-gateway" \ No newline at end of file + # - "host.docker.internal:host-gateway" + +volumes: + prometheus_data: + grafana_data: diff --git a/dev/mediamtx.yml b/dev/mediamtx.yml index ae23ca2..4a8a231 100644 --- a/dev/mediamtx.yml +++ b/dev/mediamtx.yml @@ -15,3 +15,5 @@ authMethod: http authHTTPAddress: http://host.docker.internal:3000/api/mediamtx/publish api: yes +metrics: yes +metricsAddress: :9998 diff --git a/docker/mediamtx/mediamtx.yml b/docker/mediamtx/mediamtx.yml index 51d5cb9..3a1395e 100644 --- a/docker/mediamtx/mediamtx.yml +++ b/docker/mediamtx/mediamtx.yml @@ -15,3 +15,5 @@ authMethod: http authHTTPAddress: http://hctv:3000/api/mediamtx/publish api: yes +metrics: yes +metricsAddress: :9998 diff --git a/observability/grafana/dashboards/hctv-overview.json b/observability/grafana/dashboards/hctv-overview.json new file mode 100644 index 0000000..ed6afdb --- /dev/null +++ b/observability/grafana/dashboards/hctv-overview.json @@ -0,0 +1,787 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(255, 211, 102, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "Down" + }, + "1": { + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "max(up{job=\"web\"})", + "refId": "A" + } + ], + "title": "Web", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "Down" + }, + "1": { + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "max(up{job=\"chat\"})", + "refId": "A" + } + ], + "title": "Chat", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "Down" + }, + "1": { + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "max(up{job=\"mediamtx\"})", + "refId": "A" + } + ], + "title": "MediaMTX", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "Down" + }, + "1": { + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "max(pg_up)", + "refId": "A" + } + ], + "title": "Postgres", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "Down" + }, + "1": { + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "max(redis_up)", + "refId": "A" + } + ], + "title": "Redis", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 6, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "sum by (region) (hctv_web_live_streams)", + "legendFormat": "{{region}}", + "refId": "A" + } + ], + "title": "Live Streams by Region", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 7, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "hctv_web_active_viewers", + "legendFormat": "active viewers", + "refId": "A" + }, + { + "expr": "hctv_chat_websocket_connections", + "legendFormat": "chat sockets", + "refId": "B" + } + ], + "title": "Audience Pulse", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 8, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "sum(rate(hctv_chat_messages_delivered_total[5m]))", + "legendFormat": "delivered messages", + "refId": "A" + }, + { + "expr": "sum(rate(hctv_chat_incoming_messages_total[5m]))", + "legendFormat": "all inbound frames", + "refId": "B" + } + ], + "title": "Chat Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 9, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "sum by (action) (rate(hctv_chat_moderation_actions_total[15m]))", + "legendFormat": "{{action}}", + "refId": "A" + }, + { + "expr": "sum by (reason) (rate(hctv_chat_moderation_blocks_total[15m]))", + "legendFormat": "blocked: {{reason}}", + "refId": "B" + } + ], + "title": "Moderation Load", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 10, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "sum by (outcome) (rate(hctv_web_mediamtx_auth_requests_total[5m]))", + "legendFormat": "{{outcome}}", + "refId": "A" + } + ], + "title": "Media Auth Outcomes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 11, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum by (le, job) (rate(hctv_web_background_job_duration_seconds_bucket[15m])))", + "legendFormat": "{{job}} p95", + "refId": "A" + } + ], + "title": "Background Job Latency P95", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 12, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "pg_stat_database_numbackends{datname=~\"hctv|postgres\"}", + "legendFormat": "database connections", + "refId": "A" + }, + { + "expr": "rate(pg_stat_database_xact_commit{datname=~\"hctv|postgres\"}[5m])", + "legendFormat": "commits / sec", + "refId": "B" + } + ], + "title": "Postgres Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 13, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "redis_memory_used_bytes", + "legendFormat": "redis memory", + "refId": "A" + }, + { + "expr": "redis_connected_clients", + "legendFormat": "connected clients", + "refId": "B" + } + ], + "title": "Redis Footprint", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 14, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "expr": "hctv_web_process_resident_memory_bytes", + "legendFormat": "web memory", + "refId": "A" + }, + { + "expr": "hctv_chat_process_resident_memory_bytes", + "legendFormat": "chat memory", + "refId": "B" + }, + { + "expr": "process_resident_memory_bytes{job=\"mediamtx\"}", + "legendFormat": "mediamtx memory", + "refId": "C" + } + ], + "title": "Service Memory", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": ["hackclub.tv", "observability"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "HackClub.tv Overview", + "uid": "hctv-overview", + "version": 1, + "weekStart": "" +} diff --git a/observability/grafana/provisioning/dashboards/dashboard.yml b/observability/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..c98c165 --- /dev/null +++ b/observability/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +providers: + - name: HackClubTV + folder: HackClub TV + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards diff --git a/observability/grafana/provisioning/datasources/prometheus.yml b/observability/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..00f9915 --- /dev/null +++ b/observability/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/observability/prometheus.dev.yml b/observability/prometheus.dev.yml new file mode 100644 index 0000000..b3953af --- /dev/null +++ b/observability/prometheus.dev.yml @@ -0,0 +1,39 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: web + metrics_path: /api/metrics + static_configs: + - targets: + - host.docker.internal:3000 + + - job_name: chat + metrics_path: /metrics + static_configs: + - targets: + - host.docker.internal:8000 + + - job_name: mediamtx + metrics_path: /metrics + static_configs: + - targets: + - mediamtx:9998 + + - job_name: redis + metrics_path: /metrics + static_configs: + - targets: + - redis-exporter:9121 + + - job_name: postgres + metrics_path: /metrics + static_configs: + - targets: + - postgres-exporter:9187 + + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 diff --git a/observability/prometheus.yml b/observability/prometheus.yml new file mode 100644 index 0000000..a221c0f --- /dev/null +++ b/observability/prometheus.yml @@ -0,0 +1,39 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: web + metrics_path: /api/metrics + static_configs: + - targets: + - hctv:3000 + + - job_name: chat + metrics_path: /metrics + static_configs: + - targets: + - chat:8000 + + - job_name: mediamtx + metrics_path: /metrics + static_configs: + - targets: + - mediamtx:9998 + + - job_name: redis + metrics_path: /metrics + static_configs: + - targets: + - redis-exporter:9121 + + - job_name: postgres + metrics_path: /metrics + static_configs: + - targets: + - postgres-exporter:9187 + + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5c2a840..0fda63e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -41,6 +41,9 @@ importers: hono: specifier: ^4.7.5 version: 4.11.3 + prom-client: + specifier: ^15.1.3 + version: 15.1.3 devDependencies: '@types/node': specifier: ^20.11.17 @@ -219,6 +222,9 @@ importers: pg-boss: specifier: ^10.1.6 version: 10.4.0 + prom-client: + specifier: ^15.1.3 + version: 15.1.3 react: specifier: ^19.2.3 version: 19.2.3 @@ -3890,6 +3896,9 @@ packages: resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==} engines: {node: '>=8'} + bintrees@1.0.2: + resolution: {integrity: sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==} + bl@5.1.0: resolution: {integrity: sha512-tv1ZJHLfTDnXE6tMHv73YgSJaWR2AFuPwMntBe7XL/GBFHnT0CLnsHMogfk5+GzCDC5ZWarSCYaIGATZt9dNsQ==} @@ -6683,6 +6692,10 @@ packages: resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==} engines: {node: '>=0.4.0'} + prom-client@15.1.3: + resolution: {integrity: sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==} + engines: {node: ^16 || ^18 || >=20} + prompts@2.4.2: resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} engines: {node: '>= 6'} @@ -7351,6 +7364,9 @@ packages: resolution: {integrity: sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==} engines: {node: '>=6'} + tdigest@0.1.2: + resolution: {integrity: sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==} + terser-webpack-plugin@5.3.16: resolution: {integrity: sha512-h9oBFCWrq78NyWWVcSwZarJkZ01c2AyGrzs1crmHZO3QUg9D61Wu4NPjBy69n7JqylFF5y+CsUZYmYEIZ3mR+Q==} engines: {node: '>= 10.13.0'} @@ -12331,6 +12347,8 @@ snapshots: binary-extensions@2.3.0: {} + bintrees@1.0.2: {} + bl@5.1.0: dependencies: buffer: 6.0.3 @@ -15871,6 +15889,11 @@ snapshots: progress@2.0.3: {} + prom-client@15.1.3: + dependencies: + '@opentelemetry/api': 1.9.0 + tdigest: 0.1.2 + prompts@2.4.2: dependencies: kleur: 3.0.3 @@ -16813,6 +16836,10 @@ snapshots: tapable@2.3.0: {} + tdigest@0.1.2: + dependencies: + bintrees: 1.0.2 + terser-webpack-plugin@5.3.16(webpack@5.104.1): dependencies: '@jridgewell/trace-mapping': 0.3.31