This commit is contained in:
2026-05-10 19:12:02 +02:00
commit 3398982ca8
19 changed files with 2152 additions and 0 deletions

170
src/routes/api.js Normal file
View File

@@ -0,0 +1,170 @@
const express = require("express");
const { ObjectId } = require("mongodb");
const {
assertDataset,
DATASETS,
normalizeDatasetList,
normalizeLanguageList,
} = require("../datasets");
const { getDb } = require("../db/client");
const { getImportStatus, startImport } = require("../importer/importer");
const router = express.Router();
function parsePage(value) {
const page = Number(value || 1);
return Number.isInteger(page) && page > 0 ? page : 1;
}
function parseLimit(value, fallback, max) {
const limit = Number(value || fallback);
if (!Number.isInteger(limit) || limit <= 0) {
return fallback;
}
return Math.min(limit, max);
}
function parseMaxPages(value) {
if (value === undefined || value === null || value === "") {
return undefined;
}
const maxPages = Number(value);
if (!Number.isInteger(maxPages) || maxPages <= 0) {
const error = new Error("maxPages must be a positive integer");
error.status = 400;
throw error;
}
return maxPages;
}
function buildLanguageFilter(language) {
if (!language) {
return {};
}
const [normalized] = normalizeLanguageList([language]);
return { language: normalized };
}
function buildSearchFilter(query) {
if (!query) {
return {};
}
return { $text: { $search: String(query) } };
}
router.get("/datasets", (request, response) => {
response.json({ datasets: Object.values(DATASETS) });
});
router.get("/import/status", (request, response) => {
response.json(getImportStatus());
});
router.post("/import", (request, response, next) => {
try {
const body = request.body || {};
const options = {
datasets: normalizeDatasetList(body.datasets),
languages: normalizeLanguageList(body.languages),
maxPages: parseMaxPages(body.maxPages),
};
startImport(options);
response
.status(202)
.json({ message: "Import started", status: getImportStatus() });
} catch (error) {
next(error);
}
});
router.get("/search", async (request, response, next) => {
try {
const query = String(request.query.q || "").trim();
if (!query) {
const error = new Error("q is required");
error.status = 400;
throw error;
}
const datasetKeys = normalizeDatasetList(request.query.datasets);
const languageFilter = buildLanguageFilter(request.query.language);
const limit = parseLimit(request.query.limit, 10, 50);
const db = getDb();
const results = {};
for (const datasetKey of datasetKeys) {
const dataset = DATASETS[datasetKey];
results[datasetKey] = await db
.collection(dataset.collection)
.find({ ...languageFilter, ...buildSearchFilter(query) })
.project({ searchText: 0 })
.limit(limit)
.toArray();
}
response.json({ query, results });
} catch (error) {
next(error);
}
});
router.get("/:dataset", async (request, response, next) => {
try {
const dataset = assertDataset(request.params.dataset);
const page = parsePage(request.query.page);
const limit = parseLimit(request.query.limit, 25, 100);
const skip = (page - 1) * limit;
const languageFilter = buildLanguageFilter(request.query.language);
const searchFilter = buildSearchFilter(request.query.q);
const filter = { ...languageFilter, ...searchFilter };
const collection = getDb().collection(dataset.collection);
const [items, total] = await Promise.all([
collection
.find(filter)
.project({ searchText: 0 })
.skip(skip)
.limit(limit)
.toArray(),
collection.countDocuments(filter),
]);
response.json({ dataset: dataset.key, page, limit, total, items });
} catch (error) {
next(error);
}
});
router.get("/:dataset/:id", async (request, response, next) => {
try {
const dataset = assertDataset(request.params.dataset);
const id = request.params.id;
const languageFilter = buildLanguageFilter(request.query.language);
const idFilter = ObjectId.isValid(id)
? { $or: [{ _id: new ObjectId(id) }, { sourceId: id }] }
: { sourceId: id };
const document = await getDb()
.collection(dataset.collection)
.findOne(
{ ...languageFilter, ...idFilter },
{ projection: { searchText: 0 } },
);
if (!document) {
response.status(404).json({ error: "Not found" });
return;
}
response.json(document);
} catch (error) {
next(error);
}
});
module.exports = { router };