2026-05-14 19:16:46 +07:00
|
|
|
import { config } from "../config";
|
2026-05-14 19:16:46 +07:00
|
|
|
import { createChildLogger } from "../logger";
|
|
|
|
|
import { retryWithBackoff } from "../retry";
|
2026-05-14 21:16:03 +07:00
|
|
|
import type { AnalysisResult, MessageRecord } from "./types";
|
2026-05-14 19:16:46 +07:00
|
|
|
|
|
|
|
|
const log = createChildLogger("llmModerationClient");
|
|
|
|
|
|
|
|
|
|
interface RawModerationResult {
|
|
|
|
|
message_id: string;
|
|
|
|
|
status: string;
|
|
|
|
|
flags: unknown;
|
|
|
|
|
score: number;
|
|
|
|
|
analysis: string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface RawModerationResponse {
|
|
|
|
|
results: RawModerationResult[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Parses LLM moderation response and validates against target IDs.
|
|
|
|
|
* Extracts JSON from surrounding text, validates structure, and transforms to AnalysisResult[].
|
2026-05-14 19:16:46 +07:00
|
|
|
* Scans from first '{' and attempts JSON.parse at each candidate closing brace.
|
2026-05-14 19:16:46 +07:00
|
|
|
*/
|
|
|
|
|
export function parseModerationResponse(
|
|
|
|
|
content: string,
|
|
|
|
|
targetIds: string[],
|
|
|
|
|
): AnalysisResult[] {
|
2026-05-16 23:34:07 +07:00
|
|
|
// Find first opening brace and last closing brace
|
2026-05-14 19:16:46 +07:00
|
|
|
const startIdx = content.indexOf("{");
|
2026-05-16 23:34:07 +07:00
|
|
|
const endIdx = content.lastIndexOf("}");
|
|
|
|
|
|
|
|
|
|
if (startIdx === -1 || endIdx === -1 || endIdx < startIdx) {
|
2026-05-14 19:16:46 +07:00
|
|
|
throw new Error("No JSON object found in response");
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 23:34:07 +07:00
|
|
|
// Attempt to parse the largest possible JSON object
|
2026-05-14 19:16:46 +07:00
|
|
|
let parsed: unknown;
|
2026-05-16 23:34:07 +07:00
|
|
|
const candidate = content.substring(startIdx, endIdx + 1);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
parsed = JSON.parse(candidate);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
// If full substring fails, try scanning backwards from the last }
|
|
|
|
|
let lastError: Error = error instanceof Error ? error : new Error(String(error));
|
|
|
|
|
|
|
|
|
|
for (let i = endIdx - 1; i > startIdx; i--) {
|
|
|
|
|
if (content[i] === "}") {
|
|
|
|
|
try {
|
|
|
|
|
parsed = JSON.parse(content.substring(startIdx, i + 1));
|
|
|
|
|
break;
|
|
|
|
|
} catch (innerError) {
|
|
|
|
|
lastError = innerError instanceof Error ? innerError : new Error(String(innerError));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2026-05-14 19:16:46 +07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 23:34:07 +07:00
|
|
|
if (!parsed) {
|
|
|
|
|
throw new Error(`Failed to parse JSON: ${lastError.message}`);
|
|
|
|
|
}
|
2026-05-14 19:16:46 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Validate structure
|
|
|
|
|
if (!parsed || typeof parsed !== "object" || !("results" in parsed)) {
|
|
|
|
|
throw new Error("Response missing 'results' array");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const response = parsed as RawModerationResponse;
|
|
|
|
|
if (!Array.isArray(response.results)) {
|
|
|
|
|
throw new Error("'results' must be an array");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Track which target IDs were found
|
|
|
|
|
const foundIds = new Set<string>();
|
|
|
|
|
const targetIdSet = new Set(targetIds);
|
|
|
|
|
|
|
|
|
|
// Parse and validate each result
|
|
|
|
|
const results: AnalysisResult[] = response.results.map((result) => {
|
|
|
|
|
const { message_id, status, flags, score, analysis } = result;
|
|
|
|
|
|
|
|
|
|
// Validate message_id exists and is in target list
|
|
|
|
|
if (!message_id) {
|
|
|
|
|
throw new Error("Result missing 'message_id'");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!targetIdSet.has(message_id)) {
|
|
|
|
|
throw new Error(`Unknown message_id: ${message_id}`);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 19:16:46 +07:00
|
|
|
if (foundIds.has(message_id)) {
|
|
|
|
|
throw new Error(`Duplicate message_id in results: ${message_id}`);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 19:16:46 +07:00
|
|
|
foundIds.add(message_id);
|
|
|
|
|
|
|
|
|
|
// Validate status
|
|
|
|
|
const validStatuses = ["clean", "warn", "flagged"] as const;
|
|
|
|
|
if (!validStatuses.includes(status as (typeof validStatuses)[number])) {
|
|
|
|
|
throw new Error(
|
|
|
|
|
`Invalid status: ${status}. Must be one of: ${validStatuses.join(", ")}`,
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 19:16:46 +07:00
|
|
|
// Validate score: reject null/undefined/non-finite before coercion
|
|
|
|
|
if (score === null || score === undefined) {
|
|
|
|
|
throw new Error("Invalid score: must not be null or undefined");
|
|
|
|
|
}
|
2026-05-14 19:16:46 +07:00
|
|
|
let numScore = Number(score);
|
|
|
|
|
if (!Number.isFinite(numScore)) {
|
|
|
|
|
throw new Error(`Invalid score: ${score}. Must be a finite number`);
|
|
|
|
|
}
|
|
|
|
|
numScore = Math.max(0, Math.min(1, numScore));
|
|
|
|
|
|
|
|
|
|
// Coerce flags to string array
|
|
|
|
|
let flagsArray: string[] = [];
|
|
|
|
|
if (Array.isArray(flags)) {
|
|
|
|
|
flagsArray = flags.map((f) => String(f));
|
|
|
|
|
} else if (flags) {
|
|
|
|
|
flagsArray = [String(flags)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Fallback analysis
|
|
|
|
|
const analysisStr = analysis ? String(analysis) : "";
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
messageId: message_id,
|
|
|
|
|
status: status as "clean" | "warn" | "flagged",
|
|
|
|
|
flags: flagsArray,
|
|
|
|
|
score: numScore,
|
|
|
|
|
analysis: analysisStr,
|
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Check that all target IDs were found
|
|
|
|
|
const missingIds = targetIds.filter((id) => !foundIds.has(id));
|
|
|
|
|
if (missingIds.length > 0) {
|
|
|
|
|
throw new Error(`Missing target ids in response: ${missingIds.join(", ")}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return results;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ModerationInput {
|
|
|
|
|
targets: MessageRecord[];
|
|
|
|
|
contextText: string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ModerationOutput {
|
|
|
|
|
results: AnalysisResult[];
|
|
|
|
|
raw: unknown;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Runs LLM-based moderation analysis on messages.
|
|
|
|
|
* POSTs to AI_LLM_BASE_URL with auth bearer token.
|
|
|
|
|
*/
|
|
|
|
|
export async function runModerationAnalysis(
|
|
|
|
|
input: ModerationInput,
|
|
|
|
|
): Promise<ModerationOutput> {
|
|
|
|
|
const { targets, contextText } = input;
|
|
|
|
|
|
|
|
|
|
if (!targets.length) {
|
|
|
|
|
throw new Error("No targets provided for analysis");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const targetIds = targets.map((t) => t.id);
|
|
|
|
|
|
|
|
|
|
// Build prompt
|
|
|
|
|
const messagesText = targets
|
|
|
|
|
.map((msg) => `[${msg.id}] ${msg.username}: ${msg.content}`)
|
|
|
|
|
.join("\n");
|
|
|
|
|
|
|
|
|
|
const prompt = `You are a content moderation assistant. Analyze the following messages for policy violations.
|
|
|
|
|
|
|
|
|
|
Context: ${contextText}
|
|
|
|
|
|
|
|
|
|
Messages to analyze:
|
|
|
|
|
${messagesText}
|
|
|
|
|
|
|
|
|
|
For each message, respond with a JSON object containing a "results" array. Each result must have:
|
|
|
|
|
- message_id: the message ID
|
|
|
|
|
- status: "clean", "warn", or "flagged"
|
|
|
|
|
- flags: array of violation flags (e.g., ["spam", "hate_speech"])
|
|
|
|
|
- score: confidence score from 0 to 1
|
|
|
|
|
- analysis: brief explanation
|
|
|
|
|
|
|
|
|
|
Return ONLY valid JSON, no other text.`;
|
|
|
|
|
|
|
|
|
|
const result = await retryWithBackoff(
|
|
|
|
|
async () => {
|
|
|
|
|
const controller = new AbortController();
|
|
|
|
|
const timeoutId = setTimeout(
|
|
|
|
|
() => controller.abort(),
|
|
|
|
|
config.AI_ANALYSIS_TIMEOUT_MS,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const response = await fetch(
|
|
|
|
|
`${config.AI_LLM_BASE_URL}/chat/completions`,
|
|
|
|
|
{
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
Authorization: `Bearer ${config.AI_LLM_API_KEY}`,
|
|
|
|
|
},
|
|
|
|
|
signal: controller.signal,
|
|
|
|
|
body: JSON.stringify({
|
|
|
|
|
model: config.AI_LLM_MODEL,
|
|
|
|
|
messages: [
|
|
|
|
|
{
|
|
|
|
|
role: "user",
|
|
|
|
|
content: prompt,
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
temperature: 0.3,
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (!response.ok) {
|
|
|
|
|
const text = await response.text();
|
|
|
|
|
throw new Error(`LLM API error ${response.status}: ${text}`);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 23:34:07 +07:00
|
|
|
const bodyText = await response.text();
|
|
|
|
|
try {
|
|
|
|
|
return JSON.parse(bodyText);
|
|
|
|
|
} catch (e) {
|
|
|
|
|
// Handle cases where the API provider returns trailing garbage
|
|
|
|
|
const start = bodyText.indexOf("{");
|
|
|
|
|
const end = bodyText.lastIndexOf("}");
|
|
|
|
|
if (start !== -1 && end !== -1 && end > start) {
|
|
|
|
|
return JSON.parse(bodyText.substring(start, end + 1));
|
|
|
|
|
}
|
|
|
|
|
throw e;
|
|
|
|
|
}
|
2026-05-14 19:16:46 +07:00
|
|
|
} finally {
|
|
|
|
|
clearTimeout(timeoutId);
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
retries: 3,
|
|
|
|
|
minTimeout: 1000,
|
|
|
|
|
maxTimeout: 10000,
|
|
|
|
|
logger: log,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Extract content from response
|
|
|
|
|
if (!result.choices || !Array.isArray(result.choices) || !result.choices[0]) {
|
|
|
|
|
throw new Error("Invalid LLM response structure");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const content = result.choices[0].message?.content;
|
|
|
|
|
if (!content) {
|
|
|
|
|
throw new Error("No content in LLM response");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Parse and validate
|
|
|
|
|
const parsed = parseModerationResponse(content, targetIds);
|
|
|
|
|
|
|
|
|
|
log.info(
|
|
|
|
|
{
|
|
|
|
|
targetCount: targets.length,
|
|
|
|
|
resultCount: parsed.length,
|
|
|
|
|
},
|
|
|
|
|
"Moderation analysis complete",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
results: parsed,
|
|
|
|
|
raw: result,
|
|
|
|
|
};
|
|
|
|
|
}
|