417 lines
9.5 KiB
JavaScript
417 lines
9.5 KiB
JavaScript
// graph-utils.js
|
|
//@7
|
|
/*
|
|
Consumes : flows.json
|
|
: selectors.json
|
|
: status.json
|
|
Produces : graph object consumed by graph-data.js
|
|
Short friendly labels enabled
|
|
*/
|
|
|
|
console.log("Loading: graph-utils.js");
|
|
|
|
import fs from "fs";
|
|
|
|
export function buildGraph() {
|
|
const flows = JSON.parse(fs.readFileSync("./flows.json", "utf8")).flows || [];
|
|
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
|
|
let status = {};
|
|
try {
|
|
status = JSON.parse(fs.readFileSync("./status.json", "utf8"));
|
|
} catch {}
|
|
|
|
const nodes = new Map();
|
|
const edges = [];
|
|
|
|
const normalize = url =>
|
|
String(url || "")
|
|
.trim()
|
|
.replace(/\/+$/, "")
|
|
.toLowerCase();
|
|
|
|
const shortPath = url => {
|
|
try {
|
|
const u = new URL(url);
|
|
return u.pathname || "/";
|
|
} catch {
|
|
return url;
|
|
}
|
|
};
|
|
|
|
const lastSegment = url => {
|
|
const parts = url.split("/");
|
|
return parts[parts.length - 1] || url;
|
|
};
|
|
|
|
const clusterName = url => {
|
|
const parts = url.split("/");
|
|
return parts.length > 1 ? parts[1] : "root";
|
|
};
|
|
|
|
function addNode(fullUrl, type = null) {
|
|
const id = normalize(fullUrl);
|
|
if (!id) return null;
|
|
|
|
if (!nodes.has(id)) {
|
|
nodes.set(id, {
|
|
fullUrl,
|
|
label: shortPath(fullUrl),
|
|
cluster: clusterName(shortPath(fullUrl)),
|
|
types: new Set()
|
|
});
|
|
}
|
|
if (type) nodes.get(id).types.add(type);
|
|
return id;
|
|
}
|
|
|
|
function addEdge(a, b, type) {
|
|
if (!a || !b || a === b) return;
|
|
edges.push({ from: a, to: b, type });
|
|
}
|
|
|
|
// flows (navigation)
|
|
for (const flow of flows) {
|
|
const steps = flow.map(s => addNode(s, "flow")).filter(Boolean);
|
|
for (let i = 0; i < steps.length - 1; i++) {
|
|
addEdge(steps[i], steps[i + 1], "flow");
|
|
}
|
|
}
|
|
|
|
// selectors (links/forms/UI/sections)
|
|
for (const page of Object.keys(selectors)) {
|
|
const pageId = addNode(page, "page");
|
|
const entry = selectors[page];
|
|
|
|
for (const link of entry.links || []) {
|
|
const to = addNode(link.to, "link");
|
|
addEdge(pageId, to, "link");
|
|
}
|
|
|
|
for (const form of entry.forms || []) {
|
|
const to = addNode(form.action, "form");
|
|
addEdge(pageId, to, "form");
|
|
}
|
|
|
|
for (const ui of entry.ui || []) {
|
|
const uiId = addNode(`${shortPath(page)}#ui:${ui.friendly}`, "ui");
|
|
addEdge(pageId, uiId, "ui");
|
|
}
|
|
|
|
for (const section of entry.sections || []) {
|
|
const secId = addNode(`${shortPath(page)}#section:${section.friendly}`, "section");
|
|
addEdge(pageId, secId, "section");
|
|
}
|
|
}
|
|
|
|
const outgoing = new Map();
|
|
for (const e of edges) {
|
|
outgoing.set(e.from, (outgoing.get(e.from) || 0) + 1);
|
|
}
|
|
const deadEnds = [...nodes.keys()].filter(n => !outgoing.has(n));
|
|
|
|
const nodeIds = [...nodes.keys()];
|
|
const N = nodeIds.length;
|
|
const index = new Map(nodeIds.map((id, i) => [id, i]));
|
|
|
|
const incoming = Array.from({ length: N }, () => []);
|
|
const outdeg = new Array(N).fill(0);
|
|
|
|
for (const e of edges) {
|
|
const a = index.get(e.from);
|
|
const b = index.get(e.to);
|
|
if (a == null || b == null) continue;
|
|
outdeg[a]++;
|
|
incoming[b].push(a);
|
|
}
|
|
|
|
const d = 0.85;
|
|
let rank = new Array(N).fill(1 / N);
|
|
|
|
for (let iter = 0; iter < 20; iter++) {
|
|
const next = new Array(N).fill((1 - d) / N);
|
|
for (let i = 0; i < N; i++) {
|
|
for (const j of incoming[i]) {
|
|
if (outdeg[j] > 0) next[i] += d * (rank[j] / outdeg[j]);
|
|
}
|
|
}
|
|
rank = next;
|
|
}
|
|
|
|
const pageRank = {};
|
|
for (let i = 0; i < N; i++) {
|
|
pageRank[nodeIds[i]] = rank[i];
|
|
}
|
|
|
|
const rankThreshold =
|
|
[...rank].sort((a, b) => b - a)[Math.floor(N * 0.1)] || 0;
|
|
|
|
const adj = new Map();
|
|
for (const id of nodeIds) adj.set(id, []);
|
|
for (const e of edges) adj.get(e.from).push(e.to);
|
|
|
|
const visited2 = new Set();
|
|
const stack = new Set();
|
|
const cycles = [];
|
|
|
|
function dfs(node, path) {
|
|
if (stack.has(node)) {
|
|
const idx = path.indexOf(node);
|
|
if (idx !== -1) cycles.push(path.slice(idx));
|
|
return;
|
|
}
|
|
if (visited2.has(node)) return;
|
|
|
|
visited2.add(node);
|
|
stack.add(node);
|
|
path.push(node);
|
|
|
|
for (const next of adj.get(node)) {
|
|
dfs(next, path);
|
|
if (cycles.length > 50) break;
|
|
}
|
|
|
|
path.pop();
|
|
stack.delete(node);
|
|
}
|
|
|
|
for (const id of nodeIds) {
|
|
if (!visited2.has(id)) dfs(id, []);
|
|
if (cycles.length > 50) break;
|
|
}
|
|
|
|
const sitemap = [...nodes.values()]
|
|
.map(n => ({
|
|
url: n.fullUrl,
|
|
cluster: n.cluster,
|
|
types: [...n.types]
|
|
}))
|
|
.sort((a, b) => a.url.localeCompare(b.url));
|
|
|
|
const brokenLinks = Object.entries(status)
|
|
.filter(([url, s]) =>
|
|
s.status >= 400 ||
|
|
s.status === 0 ||
|
|
s.soft404 === true
|
|
)
|
|
.map(([url, s]) => ({
|
|
url: normalize(url),
|
|
status: s.status,
|
|
finalUrl: normalize(s.finalUrl)
|
|
}));
|
|
|
|
return {
|
|
nodes,
|
|
edges,
|
|
deadEnds,
|
|
pageRank,
|
|
cycles,
|
|
rankThreshold,
|
|
sitemap,
|
|
brokenLinks
|
|
};
|
|
}
|
|
|
|
|
|
// graph-utils.js
|
|
//@7
|
|
/*
|
|
Consumes : flows.json
|
|
: selectors.json
|
|
: status.json
|
|
Produces : graph object consumed by graph-data.js
|
|
console.log("Loading: graph-utils.js");
|
|
|
|
import fs from "fs";
|
|
|
|
export function buildGraph() {
|
|
const flows = JSON.parse(fs.readFileSync("./flows.json", "utf8")).flows || [];
|
|
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
|
|
let status = {};
|
|
try {
|
|
status = JSON.parse(fs.readFileSync("./status.json", "utf8"));
|
|
} catch {}
|
|
|
|
const nodes = new Map();
|
|
const edges = [];
|
|
|
|
const normalize = url =>
|
|
String(url || "")
|
|
.trim()
|
|
.replace(/\/+$/, "")
|
|
.toLowerCase();
|
|
|
|
const lastSegment = url => {
|
|
const parts = url.split("/");
|
|
return parts[parts.length - 1] || url;
|
|
};
|
|
|
|
const clusterName = url => {
|
|
const parts = url.split("/");
|
|
return parts.length > 1 ? parts[1] : "root";
|
|
};
|
|
|
|
function addNode(fullUrl, type = null) {
|
|
const id = normalize(fullUrl);
|
|
if (!id) return null;
|
|
|
|
if (!nodes.has(id)) {
|
|
nodes.set(id, {
|
|
fullUrl,
|
|
label: lastSegment(id),
|
|
cluster: clusterName(id),
|
|
types: new Set()
|
|
});
|
|
}
|
|
if (type) nodes.get(id).types.add(type);
|
|
return id;
|
|
}
|
|
|
|
function addEdge(a, b, type) {
|
|
if (!a || !b || a === b) return;
|
|
edges.push({ from: a, to: b, type });
|
|
}
|
|
|
|
// flows (navigation)
|
|
for (const flow of flows) {
|
|
const steps = flow.map(s => addNode(s, "flow")).filter(Boolean);
|
|
for (let i = 0; i < steps.length - 1; i++) {
|
|
addEdge(steps[i], steps[i + 1], "flow");
|
|
}
|
|
}
|
|
|
|
// selectors (links/forms/UI/sections)
|
|
for (const page of Object.keys(selectors)) {
|
|
const pageId = addNode(page, "page");
|
|
const entry = selectors[page];
|
|
|
|
for (const link of entry.links || []) {
|
|
const to = addNode(link.to, "link");
|
|
addEdge(pageId, to, "link");
|
|
}
|
|
|
|
for (const form of entry.forms || []) {
|
|
const to = addNode(form.action, "form");
|
|
addEdge(pageId, to, "form");
|
|
}
|
|
|
|
for (const ui of entry.ui || []) {
|
|
const uiId = addNode(`${page}#ui:${ui.id}`, "ui");
|
|
addEdge(pageId, uiId, "ui");
|
|
}
|
|
|
|
for (const section of entry.sections || []) {
|
|
const secId = addNode(`${page}#section:${section.id}`, "section");
|
|
addEdge(pageId, secId, "section");
|
|
}
|
|
}
|
|
|
|
const outgoing = new Map();
|
|
for (const e of edges) {
|
|
outgoing.set(e.from, (outgoing.get(e.from) || 0) + 1);
|
|
}
|
|
const deadEnds = [...nodes.keys()].filter(n => !outgoing.has(n));
|
|
|
|
const nodeIds = [...nodes.keys()];
|
|
const N = nodeIds.length;
|
|
const index = new Map(nodeIds.map((id, i) => [id, i]));
|
|
|
|
const incoming = Array.from({ length: N }, () => []);
|
|
const outdeg = new Array(N).fill(0);
|
|
|
|
for (const e of edges) {
|
|
const a = index.get(e.from);
|
|
const b = index.get(e.to);
|
|
if (a == null || b == null) continue;
|
|
outdeg[a]++;
|
|
incoming[b].push(a);
|
|
}
|
|
|
|
const d = 0.85;
|
|
let rank = new Array(N).fill(1 / N);
|
|
|
|
for (let iter = 0; iter < 20; iter++) {
|
|
const next = new Array(N).fill((1 - d) / N);
|
|
for (let i = 0; i < N; i++) {
|
|
for (const j of incoming[i]) {
|
|
if (outdeg[j] > 0) next[i] += d * (rank[j] / outdeg[j]);
|
|
}
|
|
}
|
|
rank = next;
|
|
}
|
|
|
|
const pageRank = {};
|
|
for (let i = 0; i < N; i++) {
|
|
pageRank[nodeIds[i]] = rank[i];
|
|
}
|
|
|
|
const rankThreshold =
|
|
[...rank].sort((a, b) => b - a)[Math.floor(N * 0.1)] || 0;
|
|
|
|
const adj = new Map();
|
|
for (const id of nodeIds) adj.set(id, []);
|
|
for (const e of edges) adj.get(e.from).push(e.to);
|
|
|
|
const visited = new Set();
|
|
const stack = new Set();
|
|
const cycles = [];
|
|
|
|
function dfs(node, path) {
|
|
if (stack.has(node)) {
|
|
const idx = path.indexOf(node);
|
|
if (idx !== -1) cycles.push(path.slice(idx));
|
|
return;
|
|
}
|
|
if (visited.has(node)) return;
|
|
|
|
visited.add(node);
|
|
stack.add(node);
|
|
path.push(node);
|
|
|
|
for (const next of adj.get(node)) {
|
|
dfs(next, path);
|
|
if (cycles.length > 50) break;
|
|
}
|
|
|
|
path.pop();
|
|
stack.delete(node);
|
|
}
|
|
|
|
for (const id of nodeIds) {
|
|
if (!visited.has(id)) dfs(id, []);
|
|
if (cycles.length > 50) break;
|
|
}
|
|
|
|
const sitemap = [...nodes.values()]
|
|
.map(n => ({
|
|
url: n.fullUrl,
|
|
cluster: n.cluster,
|
|
types: [...n.types]
|
|
}))
|
|
.sort((a, b) => a.url.localeCompare(b.url));
|
|
|
|
const brokenLinks = Object.entries(status)
|
|
.filter(([url, s]) =>
|
|
s.status >= 400 ||
|
|
s.status === 0 ||
|
|
s.soft404 === true
|
|
)
|
|
.map(([url, s]) => ({
|
|
url: normalize(url),
|
|
status: s.status,
|
|
finalUrl: normalize(s.finalUrl)
|
|
}));
|
|
|
|
return {
|
|
nodes,
|
|
edges,
|
|
deadEnds,
|
|
pageRank,
|
|
cycles,
|
|
rankThreshold,
|
|
sitemap,
|
|
brokenLinks
|
|
};
|
|
}
|
|
|
|
*/
|