tinylink/core/pipeline/translator.js

550 lines
16 KiB
JavaScript

const fs = require('fs');
const path = require('path');
const mapCache = new Map();
const CONTROL_TOKEN_MAP = {
VT: '\u000b',
FS: '\u001c',
STX: '\u0002',
ETX: '\u0003',
CR: '\r',
LF: '\n'
};
const SELECTOR_PATTERN = /^([A-Za-z][A-Za-z0-9_]*)\[(\d+)(?:\.(\d+))?\]$/;
const FIXED_WIDTH_DIRECTIVE_PATTERN = /^([A-Za-z][A-Za-z0-9_]*):(\d+)(?::(?:"([^"]*)"|'([^']*)'))?$/;
function buildCanonical(entry, parsedPayload, connector) {
const translator = entry && typeof entry.translator === 'object' ? entry.translator : {};
const canonical = { ...parsedPayload };
if (translator.forceInstrumentId !== false) {
canonical.instrument_id = entry.instrument_id;
}
canonical.meta = {
...(parsedPayload.meta || {}),
...(translator.meta && typeof translator.meta === 'object' ? translator.meta : {}),
connector,
instrument_config: entry.config
};
return canonical;
}
function resolveTranslatorFilePath(filePath, configFilePath) {
if (!filePath || typeof filePath !== 'string') return '';
if (path.isAbsolute(filePath)) return filePath;
const candidates = [
path.resolve(process.cwd(), filePath)
];
if (configFilePath) {
candidates.push(path.resolve(path.dirname(configFilePath), filePath));
}
const matched = candidates.find((candidate) => fs.existsSync(candidate));
return matched || candidates[0];
}
function parseMapFile(fileContent, filePath) {
const lines = fileContent.split(/\r?\n/);
const messages = new Map();
const fields = new Map();
const settings = {
field_sep: '|',
component_sep: '^'
};
let pendingSection = null;
let multiline = null;
function parseKeyValue(line, index) {
const separator = line.indexOf('=');
if (separator < 0) {
throw new Error(`${filePath}:${index + 1} invalid mapping line (expected KEY = value)`);
}
const key = line.slice(0, separator).trim();
const value = line.slice(separator + 1).trim();
if (!key) {
throw new Error(`${filePath}:${index + 1} mapping key is required`);
}
if (key === 'field_sep' || key === 'component_sep') {
settings[key] = value;
return;
}
if (SELECTOR_PATTERN.test(value)) {
fields.set(key, value);
return;
}
messages.set(key, value);
}
lines.forEach((line, index) => {
const trimmed = line.trim();
if (multiline) {
if (trimmed === '>>') {
messages.set(multiline.key, multiline.lines.join('\n'));
multiline = null;
return;
}
multiline.lines.push(line);
return;
}
if (pendingSection) {
if (!trimmed) return;
if (trimmed === '<<') {
multiline = { key: pendingSection, lines: [] };
pendingSection = null;
return;
}
messages.set(pendingSection, line.trim());
pendingSection = null;
return;
}
if (!trimmed) return;
const sectionMatch = trimmed.match(/^#\s*([A-Za-z0-9_.-]+)\s*$/);
if (sectionMatch && !trimmed.includes('=')) {
pendingSection = sectionMatch[1];
return;
}
if (trimmed.startsWith('#')) return;
parseKeyValue(line, index);
});
if (multiline) {
throw new Error(`${filePath} unterminated multiline section for ${multiline.key} (expected >>)`);
}
if (pendingSection) {
throw new Error(`${filePath} section ${pendingSection} is missing a body line`);
}
return { messages, fields, settings };
}
function loadMapFile(filePath) {
const stat = fs.statSync(filePath);
const cached = mapCache.get(filePath);
if (cached && cached.mtimeMs === stat.mtimeMs) {
return cached.parsed;
}
const content = fs.readFileSync(filePath, 'utf8');
const parsed = parseMapFile(content, filePath);
mapCache.set(filePath, { mtimeMs: stat.mtimeMs, parsed });
return parsed;
}
function decodeControlTokens(value) {
return String(value).replace(/<(VT|FS|STX|ETX|CR|LF)>/gi, (_, token) => CONTROL_TOKEN_MAP[token.toUpperCase()] || '');
}
function parseSelector(selector) {
const match = String(selector || '').trim().match(SELECTOR_PATTERN);
if (!match) return null;
return {
recordType: match[1],
fieldIndex: Number(match[2]),
componentIndex: match[3] ? Number(match[3]) : null
};
}
function parseRecordLine(line, fieldSeparator) {
const text = String(line || '')
.replace(/[\u0002\u0003\u000b\u001c]/g, '')
.trim();
if (!text) return null;
if (!text.includes(fieldSeparator)) return null;
const fields = text.split(fieldSeparator);
const type = String(fields[0] || '').trim();
if (!type) return null;
return { type, fields, raw: text };
}
function extractRawPayloadCandidates(parsedPayload) {
const candidates = [];
if (typeof parsedPayload.raw_payload === 'string') {
candidates.push(parsedPayload.raw_payload);
}
if (typeof parsedPayload.meta?.raw_payload === 'string') {
candidates.push(parsedPayload.meta.raw_payload);
}
if (Array.isArray(parsedPayload.results)) {
parsedPayload.results.forEach((result) => {
if (result && String(result.test_code || '').toUpperCase() === 'RAW' && typeof result.value === 'string') {
candidates.push(result.value);
}
});
}
return candidates;
}
function stripFrameControlChars(value) {
return String(value || '')
.replace(/^[\u0002\u0003\u000b\u001c\r\n]+/, '')
.replace(/[\u0002\u0003\u000b\u001c\r\n]+$/, '');
}
function getFixedWidthSource(parsedPayload) {
const candidates = extractRawPayloadCandidates(parsedPayload);
for (let i = 0; i < candidates.length; i += 1) {
const stripped = stripFrameControlChars(candidates[i]);
if (stripped) return stripped;
}
return '';
}
function parseRawPayloadRecords(rawPayload, fieldSeparator) {
const normalized = String(rawPayload || '')
.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n');
return normalized
.split('\n')
.map((line) => parseRecordLine(line, fieldSeparator))
.filter(Boolean);
}
function buildRecordCollections(parsedPayload, settings) {
const explicitSource = Array.isArray(parsedPayload.records)
? parsedPayload.records
: Array.isArray(parsedPayload.meta?.records)
? parsedPayload.meta.records
: [];
const source = Array.isArray(explicitSource) ? [...explicitSource] : [];
const records = [];
const fieldSeparator = settings.field_sep || '|';
if (!source.length) {
const rawCandidates = extractRawPayloadCandidates(parsedPayload);
for (let i = 0; i < rawCandidates.length; i += 1) {
const parsed = parseRawPayloadRecords(rawCandidates[i], fieldSeparator);
if (parsed.length) {
source.push(...parsed.map((item) => item.raw));
break;
}
}
}
source.forEach((item) => {
if (typeof item === 'string') {
const parsed = parseRecordLine(item, fieldSeparator);
if (parsed) records.push(parsed);
return;
}
if (!item || typeof item !== 'object') return;
if (Array.isArray(item.fields) && item.type) {
records.push({
type: String(item.type),
fields: item.fields.map((value) => String(value ?? '')),
raw: ''
});
}
});
const recordsByType = new Map();
records.forEach((record) => {
if (!recordsByType.has(record.type)) recordsByType.set(record.type, []);
recordsByType.get(record.type).push(record);
});
return { records, recordsByType };
}
function resolveSelector(selector, context) {
const parsed = parseSelector(selector);
if (!parsed) return '';
const { recordType, fieldIndex, componentIndex } = parsed;
if (fieldIndex < 1) return '';
const record = context.currentRecord && context.currentRecord.type === recordType
? context.currentRecord
: (context.recordsByType.get(recordType) || [])[0];
if (!record) return '';
const field = record.fields[fieldIndex - 1];
if (field === undefined || field === null) return '';
if (!componentIndex) return field;
if (componentIndex < 1) return '';
const components = String(field).split(context.settings.component_sep || '^');
return components[componentIndex - 1] || '';
}
function resolveFieldAlias(name, context, stack = new Set()) {
if (stack.has(name)) return '';
if (!context.fields.has(name)) return '';
stack.add(name);
const selector = context.fields.get(name);
const value = resolveSelector(selector, context);
stack.delete(name);
return value;
}
function getPlaceholderValue(name, context) {
if (Object.hasOwn(context.flat, name)) {
return context.flat[name];
}
if (context.fields.has(name)) {
return resolveFieldAlias(name, context);
}
if (SELECTOR_PATTERN.test(name)) {
return resolveSelector(name, context);
}
if (!name.includes('.')) {
return '';
}
const parts = name.split('.').filter(Boolean);
let current = context.root;
for (let i = 0; i < parts.length; i += 1) {
if (!current || typeof current !== 'object') return '';
current = current[parts[i]];
}
return current === undefined || current === null ? '' : current;
}
function parseFixedWidthDirective(name) {
const match = String(name || '').match(FIXED_WIDTH_DIRECTIVE_PATTERN);
if (!match) return null;
return {
fieldName: match[1],
length: Number(match[2])
};
}
function consumeFixedWidthField(directive, context) {
if (!directive || !context.fixedWidth) return null;
const { fieldName, length } = directive;
if (!Number.isFinite(length) || length < 0) return '';
const start = context.fixedWidth.cursor;
const end = start + length;
const value = context.fixedWidth.source.slice(start, end);
context.fixedWidth.cursor = end;
if (fieldName.toLowerCase() === 'skip') return '';
return value;
}
function parseLoopDirective(value) {
const recordMatch = value.match(/^@for\s+([A-Za-z][A-Za-z0-9_]*)$/);
if (recordMatch) {
return {
type: 'record',
variable: recordMatch[1]
};
}
const rangeMatch = value.match(/^@for\s+([A-Za-z][A-Za-z0-9_]*)\s+in\s+(\d+)\.\.(\d+)$/);
if (!rangeMatch) return null;
return {
type: 'range',
variable: rangeMatch[1],
start: Number(rangeMatch[2]),
end: Number(rangeMatch[3])
};
}
function renderTemplate(template, context) {
const lines = String(template).split('\n');
const outputLines = [];
for (let index = 0; index < lines.length; index += 1) {
const line = lines[index];
const trimmed = line.trim();
const loop = parseLoopDirective(trimmed);
if (loop) {
let endIndex = index + 1;
const loopBody = [];
while (endIndex < lines.length && lines[endIndex].trim() !== '@end') {
loopBody.push(lines[endIndex]);
endIndex += 1;
}
if (endIndex >= lines.length) {
throw new Error(`unterminated loop block for ${trimmed} (expected @end)`);
}
if (loop.type === 'record') {
const records = context.recordsByType.get(loop.variable) || [];
records.forEach((record) => {
const nestedContext = { ...context, currentRecord: record };
const body = renderTemplate(loopBody.join('\n'), nestedContext);
if (body) outputLines.push(body);
});
} else {
const step = loop.start <= loop.end ? 1 : -1;
for (let value = loop.start; step > 0 ? value <= loop.end : value >= loop.end; value += step) {
const nestedContext = {
...context,
flat: {
...context.flat,
[loop.variable]: value
}
};
const body = renderTemplate(loopBody.join('\n'), nestedContext);
if (body) outputLines.push(body);
}
}
index = endIndex;
continue;
}
if (trimmed === '@end') {
throw new Error('unexpected @end without matching @for');
}
const rendered = line.replace(/\{([^{}]+)\}/g, (_, rawName) => {
const name = String(rawName || '').trim();
if (!name) return '';
const fixedDirective = parseFixedWidthDirective(name);
if (fixedDirective) {
return consumeFixedWidthField(fixedDirective, context);
}
const value = getPlaceholderValue(name, context);
return value === undefined || value === null ? '' : String(value);
});
outputLines.push(decodeControlTokens(rendered));
}
return outputLines.join('\n');
}
function buildTemplateContext(entry, parsedPayload, connector, mapDefinition) {
const root = {
...parsedPayload,
instrument_id: parsedPayload.instrument_id || entry.instrument_id,
connector,
config: entry.config || {},
meta: parsedPayload.meta || {}
};
const flat = {
...root,
...(root.meta && typeof root.meta === 'object' ? root.meta : {}),
...(root.config && typeof root.config === 'object' ? root.config : {})
};
if (Array.isArray(parsedPayload.results)) {
flat.order_tests = parsedPayload.results
.map((item) => item && item.test_code)
.filter(Boolean)
.map((testCode) => `^^^${testCode}`)
.join('\\');
}
const { records, recordsByType } = buildRecordCollections(parsedPayload, mapDefinition.settings || {});
return {
root,
flat,
fields: mapDefinition.fields || new Map(),
settings: mapDefinition.settings || {},
records,
recordsByType,
currentRecord: null,
fixedWidth: {
source: getFixedWidthSource(parsedPayload),
cursor: 0
}
};
}
function translateOverrides(entry, parsedPayload, connector) {
const translator = entry && typeof entry.translator === 'object' ? entry.translator : {};
const overrides = translator.overrides && typeof translator.overrides === 'object'
? translator.overrides
: {};
const canonical = buildCanonical(entry, { ...parsedPayload, ...overrides }, connector);
return canonical;
}
function translateTemplate(entry, parsedPayload, connector) {
const translator = entry && typeof entry.translator === 'object' ? entry.translator : {};
if (!translator.file || typeof translator.file !== 'string') {
throw new Error('translator.file is required for template engine');
}
const resolvedFilePath = resolveTranslatorFilePath(translator.file, entry?.files?.config);
if (!fs.existsSync(resolvedFilePath)) {
throw new Error(`translator file not found: ${translator.file}`);
}
const mapDefinition = loadMapFile(resolvedFilePath);
const messageKeys = Array.isArray(translator.messages) && translator.messages.length
? translator.messages.map((value) => String(value))
: Array.from(mapDefinition.messages.keys());
const context = buildTemplateContext(entry, parsedPayload, connector, mapDefinition);
const renderedMessages = messageKeys.map((messageKey) => {
if (!mapDefinition.messages.has(messageKey)) {
throw new Error(`translator message key not found in map file: ${messageKey}`);
}
const messageContext = {
...context,
fixedWidth: {
...context.fixedWidth,
cursor: 0
}
};
return {
key: messageKey,
body: renderTemplate(mapDefinition.messages.get(messageKey), messageContext)
};
});
const canonical = buildCanonical(entry, parsedPayload, connector);
canonical.meta.rendered_messages = renderedMessages;
canonical.meta.translator_file = resolvedFilePath;
return canonical;
}
const registry = new Map([
['overrides', { translate: translateOverrides }],
['template', { translate: translateTemplate }]
]);
function resolve(name) {
if (!name) return registry.get('overrides');
const key = String(name).trim().toLowerCase();
return registry.get(key) || null;
}
function translate(entry, parsedPayload, connector, engineName) {
const engine = resolve(engineName);
if (!engine) {
const options = engineName ? ` (requested: ${engineName})` : '';
throw new Error(`translator engine not found${options}`);
}
return engine.translate(entry, parsedPayload, connector);
}
module.exports = {
resolve,
translate
};