Uploaded Entire Project

This commit is contained in:
2026-03-30 01:32:43 +01:00
commit 2532dd3057
20 changed files with 2433 additions and 0 deletions

3
action-tests.json Normal file
View File

@@ -0,0 +1,3 @@
{
"actionTests": []
}

66
cli.js Normal file
View File

@@ -0,0 +1,66 @@
//@1
console.log("Loading: cli.js");
/*
Consumes : `login-config.json e.g.`
e.g. "startURL": "https://examples.eze2e.com/login/cookie/login.php",
"domain" : "https://examples.eze2e.com",
*/
/* Command Line Commands..
Step 1:
node cli.js
Step 2:
node generate-graph-data.js
>Loading: graph-utils.js
>graph-data.js written
Step 3:
open graph.html
*/
import fs from "fs";
import { execSync } from "child_process";
/*
`Runs` these..
"crawler-flow.js" , "Step 1: Crawling flows"
"selectors-crawler.js" , "Step 2: Crawling selectors"
"test-generator.js" , "Step 3: Generating tests"
"test-executor.js" , "Step 4: Running tests"
*/
const ACTION_TIMEOUT = Number(process.env.PW_ACTION_TIMEOUT) || 5000;
const NAV_TIMEOUT = Number(process.env.PW_NAV_TIMEOUT) || 5000;
console.log(`Using timeouts: ACTION_TIMEOUT=${ACTION_TIMEOUT}ms, NAV_TIMEOUT=${NAV_TIMEOUT}ms`);
function run(script, label) {
console.log(`\n=== ${label} (${script}) ===`);
execSync(`node ${script}`, {
stdio: "inherit",
env: {
...process.env,
PW_ACTION_TIMEOUT: ACTION_TIMEOUT,
PW_NAV_TIMEOUT: NAV_TIMEOUT
}
});
}
const mode = process.argv[2];
const outFile = process.argv[3];
console.log("\n=== Starting full pipeline ===");
run("crawler-flow.js", "Step 1: Crawling flows");
run("selectors-crawler.js", "Step 2: Crawling selectors");
run("test-generator.js", "Step 3: Generating tests");
run("test-executor.js", "Step 4: Running tests");
console.log("NEXT STEPS..");
console.log("node generate-graph-data.js (to generate graph data)");
console.log("open graph.html (to view graph , can view in browser from file explorer too)");
console.log("\n=== Pipeline complete ===");

476
crawler-flow.js Normal file
View File

@@ -0,0 +1,476 @@
// crawler-flow.js
//@2
/*
Writes to : flows.json
: status.json
Consumed by : graph-utils.js
Short friendly labels enabled
*/
console.log("Loading: crawler-flow.js");
import fs from "fs";
import { chromium } from "playwright";
import { normalizeUrl } from "./utils/normalizeUrl.js";
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
const {
startURL,
maxDepth,
loginConfig,
includePatterns = [],
excludePatterns = [],
matcherOptions = {}
} = config;
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
const visited = new Set();
const flows = [];
const statusMap = {};
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
function isLogoutElement(info) {
const text = (info.text || "").toLowerCase();
const href = (info.href || "").toLowerCase();
const onclick = (info.onclick || "").toLowerCase();
return LOGOUT_KEYWORDS.some(k =>
text.includes(k) || href.includes(k) || onclick.includes(k)
);
}
// Generate short friendly labels for UI and section nodes
function friendlyLabel(text, role, dataset, tag, index) {
const clean = s =>
String(s || "")
.trim()
.replace(/\s+/g, "-")
.replace(/[^a-zA-Z0-9-_]/g, "")
.toLowerCase();
if (text && clean(text)) return clean(text);
if (dataset?.tab) return `tab-${clean(dataset.tab)}`;
if (dataset?.modal) return `modal-${clean(dataset.modal)}`;
if (dataset?.accordion) return `accordion-${clean(dataset.accordion)}`;
if (role && clean(role)) return clean(role);
return `${tag}-${index}`;
}
async function getClickableElements(page) {
const clickables = await page.$$eval(
`
button,
[onclick],
a[onclick]:not([href]),
img[onclick],
img[role='button'],
img[tabindex],
div[onclick],
span[onclick],
[role='button'],
[data-toggle],
[data-tab],
[data-accordion]
`,
els =>
els.map((el, index) => ({
text: el.innerText || "",
href: el.getAttribute("href"),
onclick: el.getAttribute("onclick"),
tag: el.tagName.toLowerCase(),
role: el.getAttribute("role"),
dataset: { ...el.dataset },
index,
friendly: null,
xpath: (() => {
let path = "";
let current = el;
while (current && current.nodeType === 1) {
let idx = 1;
let sibling = current.previousElementSibling;
while (sibling) {
if (sibling.tagName === current.tagName) idx++;
sibling = sibling.previousElementSibling;
}
path = `/${current.tagName}[${idx}]` + path;
current = current.parentElement;
}
return path;
})()
}))
);
for (const el of clickables)
el.friendly = friendlyLabel(el.text, el.role, el.dataset, el.tag, el.index);
return clickables;
}
async function scrollToReveal(page) {
await page.evaluate(async () => {
const total =
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
const step = Math.max(200, Math.floor(total / 5));
for (let y = 0; y < total; y += step) {
window.scrollTo(0, y);
await new Promise(r => setTimeout(r, 150));
}
window.scrollTo(0, total);
});
}
async function discoverByClicking(page, url, depth, path) {
await scrollToReveal(page);
const clickables = await getClickableElements(page);
for (const info of clickables) {
if (isLogoutElement(info)) continue;
try {
const beforeLinks = new Set(
await page.$$eval("a[href]", as => as.map(a => a.href))
);
await page.evaluate(xpath => {
const getNode = xp => {
const result = document.evaluate(
xp,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
return result.singleNodeValue;
};
const el = getNode(xpath);
if (el) el.click();
}, info.xpath);
await page.waitForTimeout(400);
const afterLinks = await page.$$eval("a[href]", as =>
as.map(a => a.href)
);
for (const link of afterLinks) {
if (!beforeLinks.has(link)) {
const normalized = normalizeUrl(link);
if (normalized) {
await crawl(page, normalized, depth + 1, path);
}
}
}
} catch {}
}
}
async function performLoginIfNeeded(page, url) {
for (const rule of loginConfig.logins) {
if (url.includes(rule.match)) {
for (const [name, value] of Object.entries(rule.fields)) {
const sel = `input[name="${name}"]`;
await page.waitForSelector(sel);
await page.fill(sel, value);
}
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
await page.waitForSelector(submitSel);
await page.click(submitSel);
await page.waitForLoadState("networkidle");
}
}
}
async function crawl(page, rawUrl, depth, path) {
const url = normalizeUrl(rawUrl);
if (depth > maxDepth) return;
if (!matcher.allow(url)) return;
if (visited.has(url)) return;
visited.add(url);
const response = await page.goto(url, { waitUntil: "networkidle" });
const status = response?.status() || 0;
const finalUrl = response?.url() || url;
const title = await page.title();
const soft404 = title.includes("404");
statusMap[url] = { status, finalUrl, soft404 };
await performLoginIfNeeded(page, url);
const newPath = [...path, url];
flows.push(newPath);
await discoverByClicking(page, url, depth, newPath);
const links = await page.$$eval("a[href]", as =>
as.map(a => a.href).filter(h => h.startsWith("http"))
);
const normalizedLinks = links
.map(h => normalizeUrl(h))
.filter(h => h);
for (const link of normalizedLinks) {
await crawl(page, link, depth + 1, newPath);
}
if (selectors[url]) {
const entry = selectors[url];
for (const link of entry.links || []) {
const target = normalizeUrl(link.to);
if (target) await crawl(page, target, depth + 1, newPath);
}
for (const form of entry.forms || []) {
const target = normalizeUrl(form.action);
if (target) await crawl(page, target, depth + 1, newPath);
}
}
}
(async () => {
const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await crawl(page, startURL, 0, []);
fs.writeFileSync("flows.json", JSON.stringify({ flows }, null, 2));
fs.writeFileSync("status.json", JSON.stringify(statusMap, null, 2));
await browser.close();
})();
// crawler-flow.js
//@2
/*
Writes to : flows.json
: status.json
Consumed by : graph-utils.js
console.log("Loading: crawler-flow.js");
import fs from "fs";
import { chromium } from "playwright";
import { normalizeUrl } from "./utils/normalizeUrl.js";
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
const {
startURL,
maxDepth,
loginConfig,
includePatterns = [],
excludePatterns = [],
matcherOptions = {}
} = config;
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
const visited = new Set();
const flows = [];
const statusMap = {};
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
function isLogoutElement(info) {
const text = (info.text || "").toLowerCase();
const href = (info.href || "").toLowerCase();
const onclick = (info.onclick || "").toLowerCase();
return LOGOUT_KEYWORDS.some(k =>
text.includes(k) || href.includes(k) || onclick.includes(k)
);
}
async function getClickableElements(page) {
return await page.$$eval(
`
button,
[onclick],
a[onclick]:not([href]),
img[onclick],
img[role='button'],
img[tabindex],
div[onclick],
span[onclick],
[role='button'],
[data-toggle],
[data-tab],
[data-accordion]
`,
els =>
els.map(el => ({
text: el.innerText || "",
href: el.getAttribute("href"),
onclick: el.getAttribute("onclick"),
xpath: (() => {
let path = "";
let current = el;
while (current && current.nodeType === 1) {
let index = 1;
let sibling = current.previousElementSibling;
while (sibling) {
if (sibling.tagName === current.tagName) index++;
sibling = sibling.previousElementSibling;
}
path = `/${current.tagName}[${index}]` + path;
current = current.parentElement;
}
return path;
})()
}))
);
}
async function scrollToReveal(page) {
await page.evaluate(async () => {
const total =
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
const step = Math.max(200, Math.floor(total / 5));
for (let y = 0; y < total; y += step) {
window.scrollTo(0, y);
await new Promise(r => setTimeout(r, 150));
}
window.scrollTo(0, total);
});
}
async function discoverByClicking(page, url, depth, path) {
await scrollToReveal(page);
const clickables = await getClickableElements(page);
for (const info of clickables) {
if (isLogoutElement(info)) continue;
try {
const beforeLinks = new Set(
await page.$$eval("a[href]", as => as.map(a => a.href))
);
await page.evaluate(xpath => {
const getNode = xp => {
const result = document.evaluate(
xp,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
return result.singleNodeValue;
};
const el = getNode(xpath);
if (el) el.click();
}, info.xpath);
await page.waitForTimeout(400);
const afterLinks = await page.$$eval("a[href]", as =>
as.map(a => a.href)
);
for (const link of afterLinks) {
if (!beforeLinks.has(link)) {
const normalized = normalizeUrl(link);
if (normalized) {
await crawl(page, normalized, depth + 1, path);
}
}
}
} catch {}
}
}
async function performLoginIfNeeded(page, url) {
for (const rule of loginConfig.logins) {
if (url.includes(rule.match)) {
for (const [name, value] of Object.entries(rule.fields)) {
const sel = `input[name="${name}"]`;
await page.waitForSelector(sel);
await page.fill(sel, value);
}
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
await page.waitForSelector(submitSel);
await page.click(submitSel);
await page.waitForLoadState("networkidle");
}
}
}
async function crawl(page, rawUrl, depth, path) {
const url = normalizeUrl(rawUrl);
if (depth > maxDepth) return;
if (!matcher.allow(url)) return;
if (visited.has(url)) return;
visited.add(url);
const response = await page.goto(url, { waitUntil: "networkidle" });
const status = response?.status() || 0;
const finalUrl = response?.url() || url;
const title = await page.title();
const soft404 = title.includes("404");
statusMap[url] = { status, finalUrl, soft404 };
await performLoginIfNeeded(page, url);
const newPath = [...path, url];
flows.push(newPath);
await discoverByClicking(page, url, depth, newPath);
const links = await page.$$eval("a[href]", as =>
as.map(a => a.href).filter(h => h.startsWith("http"))
);
const normalizedLinks = links
.map(h => normalizeUrl(h))
.filter(h => h);
for (const link of normalizedLinks) {
await crawl(page, link, depth + 1, newPath);
}
if (selectors[url]) {
const entry = selectors[url];
for (const link of entry.links || []) {
const target = normalizeUrl(link.to);
if (target) await crawl(page, target, depth + 1, newPath);
}
for (const form of entry.forms || []) {
const target = normalizeUrl(form.action);
if (target) await crawl(page, target, depth + 1, newPath);
}
}
}
(async () => {
const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await crawl(page, startURL, 0, []);
fs.writeFileSync("flows.json", JSON.stringify({ flows }, null, 2));
fs.writeFileSync("status.json", JSON.stringify(statusMap, null, 2));
await browser.close();
})();
*/

22
flows.json Normal file
View File

@@ -0,0 +1,22 @@
{
"flows": [
[
"https://examples.eze2e.com/login/cookie/login.php"
],
[
"https://examples.eze2e.com/login/cookie/login.php",
"https://examples.eze2e.com/login/cookie/index.php"
],
[
"https://examples.eze2e.com/login/cookie/login.php",
"https://examples.eze2e.com/login/cookie/index.php",
"https://examples.eze2e.com/login/cookie/weather.php"
],
[
"https://examples.eze2e.com/login/cookie/login.php",
"https://examples.eze2e.com/login/cookie/index.php",
"https://examples.eze2e.com/login/cookie/weather.php",
"https://examples.eze2e.com/login/cookie/logout.php"
]
]
}

77
generate-graph-data.js Normal file
View File

@@ -0,0 +1,77 @@
/*
// generate-graph-data.js
import fs from "fs";
import { buildGraphImpl } from "./methods/buildGraph.js";
import { analyzeGraph } from "./methods/analyzeGraph.js";
const flows = JSON.parse(fs.readFileSync("flows.json", "utf8"));
const statusMap = JSON.parse(fs.readFileSync("status.json", "utf8"));
const graph = buildGraphImpl(flows, statusMap);
const analysis = analyzeGraph(graph);
fs.writeFileSync(
"graph-data.js",
`const graphData = ${JSON.stringify(analysis, null, 2)};`
);
console.log("graph-data.js written");
*/
//@6
/*
Write out to: `graph-data.js`
Consumed by : `graph.html`
*/
import fs from "fs";
import { buildGraph } from "./graph-utils.js";
const g = buildGraph(); // your internal graph object
const nodes = [];
for (const [id, info] of g.nodes) {
nodes.push({
id,
label: info.label,
baseColor: info.types.has("form") ? "#cc5500"
: info.types.has("link") ? "#00aa44"
: "#0077cc",
fullUrl: info.fullUrl,
cluster: info.cluster,
types: [...info.types]
});
}
const edges = g.edges.map((e, i) => ({
id: "e" + i,
from: e.from,
to: e.to,
type: e.type
}));
// ----------------------------------------------------
// PATCH: add BROKEN_LINKS to the generated output
// ----------------------------------------------------
const out = `
const NODES = ${JSON.stringify(nodes, null, 2)};
const EDGES = ${JSON.stringify(edges, null, 2)};
const DEAD_ENDS = ${JSON.stringify(g.deadEnds, null, 2)};
const PAGE_RANK = ${JSON.stringify(g.pageRank, null, 2)};
const CYCLES = ${JSON.stringify(g.cycles, null, 2)};
const CYCLES_SET = new Set(CYCLES.flat());
const TOP_RANK_THRESHOLD = ${g.rankThreshold};
const BROKEN_LINKS = ${JSON.stringify(g.brokenLinks, null, 2)};
const NODE_INFO = {};
NODES.forEach(n => NODE_INFO[n.id] = n);
`;
fs.writeFileSync("graph-data.js", out);
console.log("graph-data.js written");
console.log("");
console.log("NEXT STEPS..");
console.log("open graph.html (to view graph , can view in browser from file explorer too)");

198
graph-data.js Normal file
View File

@@ -0,0 +1,198 @@
const NODES = [
{
"id": "https://examples.eze2e.com/login/cookie/login.php",
"label": "/login/cookie/login.php",
"baseColor": "#cc5500",
"fullUrl": "https://examples.eze2e.com/login/cookie/login.php",
"cluster": "login",
"types": [
"flow",
"page",
"form"
]
},
{
"id": "https://examples.eze2e.com/login/cookie/index.php",
"label": "/login/cookie/index.php",
"baseColor": "#00aa44",
"fullUrl": "https://examples.eze2e.com/login/cookie/index.php",
"cluster": "login",
"types": [
"flow",
"page",
"link"
]
},
{
"id": "https://examples.eze2e.com/login/cookie/weather.php",
"label": "/login/cookie/weather.php",
"baseColor": "#00aa44",
"fullUrl": "https://examples.eze2e.com/login/cookie/weather.php",
"cluster": "login",
"types": [
"flow",
"link",
"page"
]
},
{
"id": "https://examples.eze2e.com/login/cookie/logout.php",
"label": "/login/cookie/logout.php",
"baseColor": "#00aa44",
"fullUrl": "https://examples.eze2e.com/login/cookie/logout.php",
"cluster": "login",
"types": [
"flow",
"link",
"page"
]
},
{
"id": "https://examples.eze2e.com/login/cookie/index.php#",
"label": "/login/cookie/index.php",
"baseColor": "#00aa44",
"fullUrl": "https://examples.eze2e.com/login/cookie/index.php#",
"cluster": "login",
"types": [
"link"
]
},
{
"id": "https://examples.eze2e.com/login/cookie/weather.php#",
"label": "/login/cookie/weather.php",
"baseColor": "#00aa44",
"fullUrl": "https://examples.eze2e.com/login/cookie/weather.php#",
"cluster": "login",
"types": [
"link"
]
}
];
const EDGES = [
{
"id": "e0",
"from": "https://examples.eze2e.com/login/cookie/login.php",
"to": "https://examples.eze2e.com/login/cookie/index.php",
"type": "flow"
},
{
"id": "e1",
"from": "https://examples.eze2e.com/login/cookie/login.php",
"to": "https://examples.eze2e.com/login/cookie/index.php",
"type": "flow"
},
{
"id": "e2",
"from": "https://examples.eze2e.com/login/cookie/index.php",
"to": "https://examples.eze2e.com/login/cookie/weather.php",
"type": "flow"
},
{
"id": "e3",
"from": "https://examples.eze2e.com/login/cookie/login.php",
"to": "https://examples.eze2e.com/login/cookie/index.php",
"type": "flow"
},
{
"id": "e4",
"from": "https://examples.eze2e.com/login/cookie/index.php",
"to": "https://examples.eze2e.com/login/cookie/weather.php",
"type": "flow"
},
{
"id": "e5",
"from": "https://examples.eze2e.com/login/cookie/weather.php",
"to": "https://examples.eze2e.com/login/cookie/logout.php",
"type": "flow"
},
{
"id": "e6",
"from": "https://examples.eze2e.com/login/cookie/login.php",
"to": "https://examples.eze2e.com/login/cookie/index.php#",
"type": "link"
},
{
"id": "e7",
"from": "https://examples.eze2e.com/login/cookie/login.php",
"to": "https://examples.eze2e.com/login/cookie/weather.php",
"type": "link"
},
{
"id": "e8",
"from": "https://examples.eze2e.com/login/cookie/login.php",
"to": "https://examples.eze2e.com/login/cookie/logout.php",
"type": "link"
},
{
"id": "e9",
"from": "https://examples.eze2e.com/login/cookie/index.php",
"to": "https://examples.eze2e.com/login/cookie/index.php#",
"type": "link"
},
{
"id": "e10",
"from": "https://examples.eze2e.com/login/cookie/index.php",
"to": "https://examples.eze2e.com/login/cookie/weather.php",
"type": "link"
},
{
"id": "e11",
"from": "https://examples.eze2e.com/login/cookie/index.php",
"to": "https://examples.eze2e.com/login/cookie/logout.php",
"type": "link"
},
{
"id": "e12",
"from": "https://examples.eze2e.com/login/cookie/weather.php",
"to": "https://examples.eze2e.com/login/cookie/index.php",
"type": "link"
},
{
"id": "e13",
"from": "https://examples.eze2e.com/login/cookie/weather.php",
"to": "https://examples.eze2e.com/login/cookie/weather.php#",
"type": "link"
},
{
"id": "e14",
"from": "https://examples.eze2e.com/login/cookie/weather.php",
"to": "https://examples.eze2e.com/login/cookie/logout.php",
"type": "link"
},
{
"id": "e15",
"from": "https://examples.eze2e.com/login/cookie/logout.php",
"to": "https://examples.eze2e.com/login/cookie/login.php",
"type": "form"
}
];
const DEAD_ENDS = [
"https://examples.eze2e.com/login/cookie/index.php#",
"https://examples.eze2e.com/login/cookie/weather.php#"
];
const PAGE_RANK = {
"https://examples.eze2e.com/login/cookie/login.php": 0.1007271871669504,
"https://examples.eze2e.com/login/cookie/index.php": 0.08544952643888591,
"https://examples.eze2e.com/login/cookie/weather.php": 0.08287660481566009,
"https://examples.eze2e.com/login/cookie/logout.php": 0.08904887919093986,
"https://examples.eze2e.com/login/cookie/index.php#": 0.05380999851672653,
"https://examples.eze2e.com/login/cookie/weather.php#": 0.04261944033710667
};
const CYCLES = [
[
"https://examples.eze2e.com/login/cookie/login.php",
"https://examples.eze2e.com/login/cookie/index.php",
"https://examples.eze2e.com/login/cookie/weather.php",
"https://examples.eze2e.com/login/cookie/logout.php"
],
[
"https://examples.eze2e.com/login/cookie/index.php",
"https://examples.eze2e.com/login/cookie/weather.php"
]
];
const CYCLES_SET = new Set(CYCLES.flat());
const TOP_RANK_THRESHOLD = 0.1007271871669504;
const BROKEN_LINKS = [];
const NODE_INFO = {};
NODES.forEach(n => NODE_INFO[n.id] = n);

416
graph-utils.js Normal file
View File

@@ -0,0 +1,416 @@
// graph-utils.js
//@7
/*
Consumes : flows.json
: selectors.json
: status.json
Produces : graph object consumed by graph-data.js
Short friendly labels enabled
*/
console.log("Loading: graph-utils.js");
import fs from "fs";
export function buildGraph() {
const flows = JSON.parse(fs.readFileSync("./flows.json", "utf8")).flows || [];
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
let status = {};
try {
status = JSON.parse(fs.readFileSync("./status.json", "utf8"));
} catch {}
const nodes = new Map();
const edges = [];
const normalize = url =>
String(url || "")
.trim()
.replace(/\/+$/, "")
.toLowerCase();
const shortPath = url => {
try {
const u = new URL(url);
return u.pathname || "/";
} catch {
return url;
}
};
const lastSegment = url => {
const parts = url.split("/");
return parts[parts.length - 1] || url;
};
const clusterName = url => {
const parts = url.split("/");
return parts.length > 1 ? parts[1] : "root";
};
function addNode(fullUrl, type = null) {
const id = normalize(fullUrl);
if (!id) return null;
if (!nodes.has(id)) {
nodes.set(id, {
fullUrl,
label: shortPath(fullUrl),
cluster: clusterName(shortPath(fullUrl)),
types: new Set()
});
}
if (type) nodes.get(id).types.add(type);
return id;
}
function addEdge(a, b, type) {
if (!a || !b || a === b) return;
edges.push({ from: a, to: b, type });
}
// flows (navigation)
for (const flow of flows) {
const steps = flow.map(s => addNode(s, "flow")).filter(Boolean);
for (let i = 0; i < steps.length - 1; i++) {
addEdge(steps[i], steps[i + 1], "flow");
}
}
// selectors (links/forms/UI/sections)
for (const page of Object.keys(selectors)) {
const pageId = addNode(page, "page");
const entry = selectors[page];
for (const link of entry.links || []) {
const to = addNode(link.to, "link");
addEdge(pageId, to, "link");
}
for (const form of entry.forms || []) {
const to = addNode(form.action, "form");
addEdge(pageId, to, "form");
}
for (const ui of entry.ui || []) {
const uiId = addNode(`${shortPath(page)}#ui:${ui.friendly}`, "ui");
addEdge(pageId, uiId, "ui");
}
for (const section of entry.sections || []) {
const secId = addNode(`${shortPath(page)}#section:${section.friendly}`, "section");
addEdge(pageId, secId, "section");
}
}
const outgoing = new Map();
for (const e of edges) {
outgoing.set(e.from, (outgoing.get(e.from) || 0) + 1);
}
const deadEnds = [...nodes.keys()].filter(n => !outgoing.has(n));
const nodeIds = [...nodes.keys()];
const N = nodeIds.length;
const index = new Map(nodeIds.map((id, i) => [id, i]));
const incoming = Array.from({ length: N }, () => []);
const outdeg = new Array(N).fill(0);
for (const e of edges) {
const a = index.get(e.from);
const b = index.get(e.to);
if (a == null || b == null) continue;
outdeg[a]++;
incoming[b].push(a);
}
const d = 0.85;
let rank = new Array(N).fill(1 / N);
for (let iter = 0; iter < 20; iter++) {
const next = new Array(N).fill((1 - d) / N);
for (let i = 0; i < N; i++) {
for (const j of incoming[i]) {
if (outdeg[j] > 0) next[i] += d * (rank[j] / outdeg[j]);
}
}
rank = next;
}
const pageRank = {};
for (let i = 0; i < N; i++) {
pageRank[nodeIds[i]] = rank[i];
}
const rankThreshold =
[...rank].sort((a, b) => b - a)[Math.floor(N * 0.1)] || 0;
const adj = new Map();
for (const id of nodeIds) adj.set(id, []);
for (const e of edges) adj.get(e.from).push(e.to);
const visited2 = new Set();
const stack = new Set();
const cycles = [];
function dfs(node, path) {
if (stack.has(node)) {
const idx = path.indexOf(node);
if (idx !== -1) cycles.push(path.slice(idx));
return;
}
if (visited2.has(node)) return;
visited2.add(node);
stack.add(node);
path.push(node);
for (const next of adj.get(node)) {
dfs(next, path);
if (cycles.length > 50) break;
}
path.pop();
stack.delete(node);
}
for (const id of nodeIds) {
if (!visited2.has(id)) dfs(id, []);
if (cycles.length > 50) break;
}
const sitemap = [...nodes.values()]
.map(n => ({
url: n.fullUrl,
cluster: n.cluster,
types: [...n.types]
}))
.sort((a, b) => a.url.localeCompare(b.url));
const brokenLinks = Object.entries(status)
.filter(([url, s]) =>
s.status >= 400 ||
s.status === 0 ||
s.soft404 === true
)
.map(([url, s]) => ({
url: normalize(url),
status: s.status,
finalUrl: normalize(s.finalUrl)
}));
return {
nodes,
edges,
deadEnds,
pageRank,
cycles,
rankThreshold,
sitemap,
brokenLinks
};
}
// graph-utils.js
//@7
/*
Consumes : flows.json
: selectors.json
: status.json
Produces : graph object consumed by graph-data.js
console.log("Loading: graph-utils.js");
import fs from "fs";
export function buildGraph() {
const flows = JSON.parse(fs.readFileSync("./flows.json", "utf8")).flows || [];
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
let status = {};
try {
status = JSON.parse(fs.readFileSync("./status.json", "utf8"));
} catch {}
const nodes = new Map();
const edges = [];
const normalize = url =>
String(url || "")
.trim()
.replace(/\/+$/, "")
.toLowerCase();
const lastSegment = url => {
const parts = url.split("/");
return parts[parts.length - 1] || url;
};
const clusterName = url => {
const parts = url.split("/");
return parts.length > 1 ? parts[1] : "root";
};
function addNode(fullUrl, type = null) {
const id = normalize(fullUrl);
if (!id) return null;
if (!nodes.has(id)) {
nodes.set(id, {
fullUrl,
label: lastSegment(id),
cluster: clusterName(id),
types: new Set()
});
}
if (type) nodes.get(id).types.add(type);
return id;
}
function addEdge(a, b, type) {
if (!a || !b || a === b) return;
edges.push({ from: a, to: b, type });
}
// flows (navigation)
for (const flow of flows) {
const steps = flow.map(s => addNode(s, "flow")).filter(Boolean);
for (let i = 0; i < steps.length - 1; i++) {
addEdge(steps[i], steps[i + 1], "flow");
}
}
// selectors (links/forms/UI/sections)
for (const page of Object.keys(selectors)) {
const pageId = addNode(page, "page");
const entry = selectors[page];
for (const link of entry.links || []) {
const to = addNode(link.to, "link");
addEdge(pageId, to, "link");
}
for (const form of entry.forms || []) {
const to = addNode(form.action, "form");
addEdge(pageId, to, "form");
}
for (const ui of entry.ui || []) {
const uiId = addNode(`${page}#ui:${ui.id}`, "ui");
addEdge(pageId, uiId, "ui");
}
for (const section of entry.sections || []) {
const secId = addNode(`${page}#section:${section.id}`, "section");
addEdge(pageId, secId, "section");
}
}
const outgoing = new Map();
for (const e of edges) {
outgoing.set(e.from, (outgoing.get(e.from) || 0) + 1);
}
const deadEnds = [...nodes.keys()].filter(n => !outgoing.has(n));
const nodeIds = [...nodes.keys()];
const N = nodeIds.length;
const index = new Map(nodeIds.map((id, i) => [id, i]));
const incoming = Array.from({ length: N }, () => []);
const outdeg = new Array(N).fill(0);
for (const e of edges) {
const a = index.get(e.from);
const b = index.get(e.to);
if (a == null || b == null) continue;
outdeg[a]++;
incoming[b].push(a);
}
const d = 0.85;
let rank = new Array(N).fill(1 / N);
for (let iter = 0; iter < 20; iter++) {
const next = new Array(N).fill((1 - d) / N);
for (let i = 0; i < N; i++) {
for (const j of incoming[i]) {
if (outdeg[j] > 0) next[i] += d * (rank[j] / outdeg[j]);
}
}
rank = next;
}
const pageRank = {};
for (let i = 0; i < N; i++) {
pageRank[nodeIds[i]] = rank[i];
}
const rankThreshold =
[...rank].sort((a, b) => b - a)[Math.floor(N * 0.1)] || 0;
const adj = new Map();
for (const id of nodeIds) adj.set(id, []);
for (const e of edges) adj.get(e.from).push(e.to);
const visited = new Set();
const stack = new Set();
const cycles = [];
function dfs(node, path) {
if (stack.has(node)) {
const idx = path.indexOf(node);
if (idx !== -1) cycles.push(path.slice(idx));
return;
}
if (visited.has(node)) return;
visited.add(node);
stack.add(node);
path.push(node);
for (const next of adj.get(node)) {
dfs(next, path);
if (cycles.length > 50) break;
}
path.pop();
stack.delete(node);
}
for (const id of nodeIds) {
if (!visited.has(id)) dfs(id, []);
if (cycles.length > 50) break;
}
const sitemap = [...nodes.values()]
.map(n => ({
url: n.fullUrl,
cluster: n.cluster,
types: [...n.types]
}))
.sort((a, b) => a.url.localeCompare(b.url));
const brokenLinks = Object.entries(status)
.filter(([url, s]) =>
s.status >= 400 ||
s.status === 0 ||
s.soft404 === true
)
.map(([url, s]) => ({
url: normalize(url),
status: s.status,
finalUrl: normalize(s.finalUrl)
}));
return {
nodes,
edges,
deadEnds,
pageRank,
cycles,
rankThreshold,
sitemap,
brokenLinks
};
}
*/

122
graph.html Normal file
View File

@@ -0,0 +1,122 @@
<!-- graph.html -->
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Flow Graph Viewer</title>
<style>
body { margin: 0; font-family: sans-serif; display: flex; height: 100vh; }
#sidebar {
width: 240px; background: #f4f4f4; padding: 12px; overflow-y: auto;
border-right: 1px solid #ccc;
}
#network { flex: 1; }
h3 { margin-top: 0; }
.node-info { font-size: 12px; white-space: pre-wrap; }
</style>
<script src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
<script src="graph-data.js"></script>
</head>
<body>
<div id="sidebar">
<h3>Filters</h3>
<label><input type="checkbox" id="flow" checked> Flow edges</label><br>
<label><input type="checkbox" id="link" checked> Link edges</label><br>
<label><input type="checkbox" id="form" checked> Form edges</label><br>
<label><input type="checkbox" id="dead" checked> Show dead-ends</label><br>
<label><input type="checkbox" id="cycles"> Highlight cycles</label><br>
<label><input type="checkbox" id="rank"> Highlight top-ranked</label><br>
<label><input type="checkbox" id="broken"> Highlight broken links</label><br>
<h3>Node</h3>
<div id="info" class="node-info"></div>
</div>
<div id="network"></div>
<script>
const container = document.getElementById("network");
const data = {
nodes: new vis.DataSet(NODES),
edges: new vis.DataSet(EDGES)
};
const network = new vis.Network(container, data, {
layout: { improvedLayout: true },
physics: { stabilization: false }
});
function applyFilters() {
const showFlow = document.getElementById("flow").checked;
const showLink = document.getElementById("link").checked;
const showForm = document.getElementById("form").checked;
const showDead = document.getElementById("dead").checked;
const highlightCycles = document.getElementById("cycles").checked;
const highlightRank = document.getElementById("rank").checked;
const highlightBroken = document.getElementById("broken").checked;
data.nodes.update(NODES.map(n => {
const isDead = DEAD_ENDS.includes(n.id);
const inCycle = CYCLES_SET.has(n.id);
const rank = PAGE_RANK[n.id] || 0;
// -----------------------------
// PATCH 1 — compare normalized IDs
// -----------------------------
const isBroken = BROKEN_LINKS.some(b => b.url === n.id);
return {
id: n.id,
hidden: !showDead && isDead,
color:
highlightBroken && isBroken ? "#ff0000" :
highlightCycles && inCycle ? "#ff00aa" :
highlightRank && rank > TOP_RANK_THRESHOLD ? "#ff8800" :
n.baseColor
};
}));
data.edges.update(EDGES.map(e => ({
id: e.id,
hidden:
(e.type === "flow" && !showFlow) ||
(e.type === "link" && !showLink) ||
(e.type === "form" && !showForm)
})));
}
document.querySelectorAll("#sidebar input").forEach(cb =>
cb.addEventListener("change", applyFilters)
);
network.on("click", params => {
if (!params.nodes.length) {
document.getElementById("info").textContent = "";
return;
}
const id = params.nodes[0];
const info = NODE_INFO[id];
// -----------------------------
// PATCH 2 — compare normalized IDs
// -----------------------------
const broken = BROKEN_LINKS.find(b => b.url === id);
document.getElementById("info").textContent =
`URL: ${info.fullUrl}
Cluster: ${info.cluster}
Types: ${info.types.join(", ")}
PageRank: ${PAGE_RANK[id].toFixed(5)}
Dead-end: ${DEAD_ENDS.includes(id)}
In cycle: ${CYCLES_SET.has(id)}
Broken: ${broken ? broken.status : "no"}`;
});
applyFilters();
</script>
</body>
</html>

46
login-config.json Normal file
View File

@@ -0,0 +1,46 @@
{
"startURL": "https://examples.eze2e.com/login/cookie/login.php",
"startURL_not_used_3": "https://templates.eze2e.com",
"startURL_not_used_4": "https://templates.eze2e.com/app",
"startURLX": "https://www.eze2e.com",
"includePatterns": [
"https://examples.eze2e.com/login/cookie"
],
"XexcludePatterns": [
"/photo-galleries",
"/wp-content",
"/survivors",
"/members",
"/yourdomain/dataprivacy",
"/contact",
"/forums",
"/product-videos",
"/contact",
"/wp-login",
"/log-in",
"/logout",
"/register-account",
"/admin",
"^https://example\\.com/private"
],
"matcherOptions": {
"logging": true
} ,
"maxDepth": 5,
"loginConfig": {
"logins": [
{
"match": "login.php",
"fields": {
"user": "demo",
"pass": "password"
},
"submit": "login"
}
]
}
}

558
selectors-crawler.js Normal file
View File

@@ -0,0 +1,558 @@
// selectors-crawler.js
//@3
/*
Writes to : selectors.json
Consumed by : graph-utils.js
: test-generator.js
Short friendly labels enabled
*/
console.log("Loading: selectors-crawler.js");
import fs from "fs";
import { chromium } from "playwright";
import { normalizeUrl } from "./utils/normalizeUrl.js";
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
const {
startURL,
maxDepth,
loginConfig,
includePatterns = [],
excludePatterns = [],
matcherOptions = {}
} = config;
const visited = new Set();
const selectors = {};
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
function isLogoutElement(info) {
const text = (info.text || "").toLowerCase();
const href = (info.href || "").toLowerCase();
const onclick = (info.onclick || "").toLowerCase();
return LOGOUT_KEYWORDS.some(k =>
text.includes(k) || href.includes(k) || onclick.includes(k)
);
}
// Generate short friendly labels
function friendlyLabel(text, role, dataset, tag, index) {
const clean = s =>
String(s || "")
.trim()
.replace(/\s+/g, "-")
.replace(/[^a-zA-Z0-9-_]/g, "")
.toLowerCase();
if (text && clean(text)) return clean(text);
if (dataset?.tab) return `tab-${clean(dataset.tab)}`;
if (dataset?.modal) return `modal-${clean(dataset.modal)}`;
if (dataset?.accordion) return `accordion-${clean(dataset.accordion)}`;
if (role && clean(role)) return clean(role);
return `${tag}-${index}`;
}
async function getUIElements(page) {
const ui = await page.$$eval(
`
button,
[onclick],
a[onclick]:not([href]),
img[onclick],
img[role='button'],
img[tabindex],
div[onclick],
span[onclick],
[role='button'],
[data-toggle],
[data-tab],
[data-accordion]
`,
els =>
els.map((el, index) => ({
id: index,
tag: el.tagName.toLowerCase(),
text: el.innerText || "",
role: el.getAttribute("role") || null,
onclick: el.getAttribute("onclick"),
href: el.getAttribute("href"),
dataset: { ...el.dataset }
}))
);
for (const el of ui)
el.friendly = friendlyLabel(el.text, el.role, el.dataset, el.tag, el.id);
return ui;
}
async function getSections(page) {
const sections = await page.$$eval(
`
section,
[role='dialog'],
[role='tabpanel'],
[role='tablist'],
.modal,
[data-modal],
[data-section],
[data-tab-panel],
[data-accordion-panel]
`,
els =>
els.map((el, index) => ({
id: el.id || index,
tag: el.tagName.toLowerCase(),
text: (el.innerText || "").slice(0, 200),
role: el.getAttribute("role") || null,
dataset: { ...el.dataset }
}))
);
for (const el of sections)
el.friendly = friendlyLabel(el.text, el.role, el.dataset, el.tag, el.id);
return sections;
}
async function scrollToReveal(page) {
await page.evaluate(async () => {
const total =
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
const step = Math.max(200, Math.floor(total / 5));
for (let y = 0; y < total; y += step) {
window.scrollTo(0, y);
await new Promise(r => setTimeout(r, 150));
}
window.scrollTo(0, total);
});
}
async function discoverByClicking(page, url, depth) {
await scrollToReveal(page);
const clickables = await page.$$eval(
`
button,
[onclick],
a[onclick]:not([href]),
img[onclick],
img[role='button'],
img[tabindex],
div[onclick],
span[onclick],
[role='button'],
[data-toggle],
[data-tab],
[data-accordion]
`,
els =>
els.map((el, index) => ({
text: el.innerText || "",
href: el.getAttribute("href"),
onclick: el.getAttribute("onclick"),
tag: el.tagName.toLowerCase(),
role: el.getAttribute("role"),
dataset: { ...el.dataset },
index,
xpath: (() => {
let path = "";
let current = el;
while (current && current.nodeType === 1) {
let idx = 1;
let sibling = current.previousElementSibling;
while (sibling) {
if (sibling.tagName === current.tagName) idx++;
sibling = sibling.previousElementSibling;
}
path = `/${current.tagName}[${idx}]` + path;
current = current.parentElement;
}
return path;
})()
}))
);
for (const info of clickables) {
if (isLogoutElement(info)) continue;
try {
const beforeLinks = new Set(
await page.$$eval("a[href]", as => as.map(a => a.href))
);
await page.evaluate(xpath => {
const getNode = xp => {
const result = document.evaluate(
xp,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
return result.singleNodeValue;
};
const el = getNode(xpath);
if (el) el.click();
}, info.xpath);
await page.waitForTimeout(400);
const afterLinks = await page.$$eval("a[href]", as =>
as.map(a => a.href)
);
for (const link of afterLinks) {
if (!beforeLinks.has(link)) {
const normalized = normalizeUrl(link);
if (normalized) await crawl(page, normalized, depth + 1);
}
}
} catch {}
}
}
async function performLoginIfNeeded(page, url) {
for (const rule of loginConfig.logins) {
if (url.includes(rule.match)) {
for (const [name, value] of Object.entries(rule.fields)) {
const sel = `input[name="${name}"]`;
await page.waitForSelector(sel);
await page.fill(sel, value);
}
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
await page.waitForSelector(submitSel);
await page.click(submitSel);
await page.waitForLoadState("networkidle");
}
}
}
async function crawl(page, rawUrl, depth) {
const url = normalizeUrl(rawUrl);
if (depth > maxDepth) return;
if (!matcher.allow(url)) return;
if (visited.has(url)) return;
visited.add(url);
await page.goto(url, { waitUntil: "networkidle" });
await performLoginIfNeeded(page, url);
const forms = await page.$$eval("form", forms =>
forms.map(f => ({
action: f.action,
inputs: Array.from(f.querySelectorAll("input, textarea, select")).map(i => ({
name: i.name,
tag: i.tagName.toLowerCase(),
type: i.type || null
}))
}))
);
const links = await page.$$eval("a[href]", as =>
as.map(a => a.href).filter(h => h.startsWith("http"))
);
const ui = await getUIElements(page);
const sections = await getSections(page);
selectors[url] = {
forms,
links: links.map(href => ({ to: href })),
ui,
sections
};
await discoverByClicking(page, url, depth);
const normalizedLinks = links
.map(h => normalizeUrl(h))
.filter(h => h);
for (const link of normalizedLinks) {
await crawl(page, link, depth + 1);
}
}
(async () => {
const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await crawl(page, startURL, 0);
fs.writeFileSync("selectors.json", JSON.stringify(selectors, null, 2));
await browser.close();
})();
// selectors-crawler.js
//@3
/*
Writes to : selectors.json
Consumed by : graph-utils.js
: test-generator.js
console.log("Loading: selectors-crawler.js");
import fs from "fs";
import { chromium } from "playwright";
import { normalizeUrl } from "./utils/normalizeUrl.js";
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
const {
startURL,
maxDepth,
loginConfig,
includePatterns = [],
excludePatterns = [],
matcherOptions = {}
} = config;
const visited = new Set();
const selectors = {};
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
function isLogoutElement(info) {
const text = (info.text || "").toLowerCase();
const href = (info.href || "").toLowerCase();
const onclick = (info.onclick || "").toLowerCase();
return LOGOUT_KEYWORDS.some(k =>
text.includes(k) || href.includes(k) || onclick.includes(k)
);
}
async function getUIElements(page) {
return await page.$$eval(
`
button,
[onclick],
a[onclick]:not([href]),
img[onclick],
img[role='button'],
img[tabindex],
div[onclick],
span[onclick],
[role='button'],
[data-toggle],
[data-tab],
[data-accordion]
`,
els =>
els.map((el, index) => ({
id: `ui-${index}`,
tag: el.tagName.toLowerCase(),
text: el.innerText || "",
role: el.getAttribute("role") || null,
onclick: el.getAttribute("onclick"),
href: el.getAttribute("href"),
dataset: { ...el.dataset }
}))
);
}
async function getSections(page) {
return await page.$$eval(
`
section,
[role='dialog'],
[role='tabpanel'],
[role='tablist'],
.modal,
[data-modal],
[data-section],
[data-tab-panel],
[data-accordion-panel]
`,
els =>
els.map((el, index) => ({
id: el.id || `section-${index}`,
tag: el.tagName.toLowerCase(),
text: (el.innerText || "").slice(0, 200),
role: el.getAttribute("role") || null,
dataset: { ...el.dataset }
}))
);
}
async function scrollToReveal(page) {
await page.evaluate(async () => {
const total =
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
const step = Math.max(200, Math.floor(total / 5));
for (let y = 0; y < total; y += step) {
window.scrollTo(0, y);
await new Promise(r => setTimeout(r, 150));
}
window.scrollTo(0, total);
});
}
async function discoverByClicking(page, url, depth) {
await scrollToReveal(page);
const clickables = await page.$$eval(
`
button,
[onclick],
a[onclick]:not([href]),
img[onclick],
img[role='button'],
img[tabindex],
div[onclick],
span[onclick],
[role='button'],
[data-toggle],
[data-tab],
[data-accordion]
`,
els =>
els.map(el => ({
text: el.innerText || "",
href: el.getAttribute("href"),
onclick: el.getAttribute("onclick"),
xpath: (() => {
let path = "";
let current = el;
while (current && current.nodeType === 1) {
let index = 1;
let sibling = current.previousElementSibling;
while (sibling) {
if (sibling.tagName === current.tagName) index++;
sibling = sibling.previousElementSibling;
}
path = `/${current.tagName}[${index}]` + path;
current = current.parentElement;
}
return path;
})()
}))
);
for (const info of clickables) {
if (isLogoutElement(info)) continue;
try {
const beforeLinks = new Set(
await page.$$eval("a[href]", as => as.map(a => a.href))
);
await page.evaluate(xpath => {
const getNode = xp => {
const result = document.evaluate(
xp,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
return result.singleNodeValue;
};
const el = getNode(xpath);
if (el) el.click();
}, info.xpath);
await page.waitForTimeout(400);
const afterLinks = await page.$$eval("a[href]", as =>
as.map(a => a.href)
);
for (const link of afterLinks) {
if (!beforeLinks.has(link)) {
const normalized = normalizeUrl(link);
if (normalized) await crawl(page, normalized, depth + 1);
}
}
} catch {}
}
}
async function performLoginIfNeeded(page, url) {
for (const rule of loginConfig.logins) {
if (url.includes(rule.match)) {
for (const [name, value] of Object.entries(rule.fields)) {
const sel = `input[name="${name}"]`;
await page.waitForSelector(sel);
await page.fill(sel, value);
}
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
await page.waitForSelector(submitSel);
await page.click(submitSel);
await page.waitForLoadState("networkidle");
}
}
}
async function crawl(page, rawUrl, depth) {
const url = normalizeUrl(rawUrl);
if (depth > maxDepth) return;
if (!matcher.allow(url)) return;
if (visited.has(url)) return;
visited.add(url);
await page.goto(url, { waitUntil: "networkidle" });
await performLoginIfNeeded(page, url);
const forms = await page.$$eval("form", forms =>
forms.map(f => ({
action: f.action,
inputs: Array.from(f.querySelectorAll("input, textarea, select")).map(i => ({
name: i.name,
tag: i.tagName.toLowerCase(),
type: i.type || null
}))
}))
);
const links = await page.$$eval("a[href]", as =>
as.map(a => a.href).filter(h => h.startsWith("http"))
);
const ui = await getUIElements(page);
const sections = await getSections(page);
selectors[url] = {
forms,
links: links.map(href => ({ to: href })),
ui,
sections
};
await discoverByClicking(page, url, depth);
const normalizedLinks = links
.map(h => normalizeUrl(h))
.filter(h => h);
for (const link of normalizedLinks) {
await crawl(page, link, depth + 1);
}
}
(async () => {
const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
await crawl(page, startURL, 0);
fs.writeFileSync("selectors.json", JSON.stringify(selectors, null, 2));
await browser.close();
})();
*/

77
selectors.json Normal file
View File

@@ -0,0 +1,77 @@
{
"https://examples.eze2e.com/login/cookie/login.php": {
"forms": [],
"links": [
{
"to": "https://examples.eze2e.com/login/cookie/index.php#"
},
{
"to": "https://examples.eze2e.com/login/cookie/weather.php"
},
{
"to": "https://examples.eze2e.com/login/cookie/logout.php"
}
],
"ui": [],
"sections": []
},
"https://examples.eze2e.com/login/cookie/index.php": {
"forms": [],
"links": [
{
"to": "https://examples.eze2e.com/login/cookie/index.php#"
},
{
"to": "https://examples.eze2e.com/login/cookie/weather.php"
},
{
"to": "https://examples.eze2e.com/login/cookie/logout.php"
}
],
"ui": [],
"sections": []
},
"https://examples.eze2e.com/login/cookie/weather.php": {
"forms": [],
"links": [
{
"to": "https://examples.eze2e.com/login/cookie/index.php"
},
{
"to": "https://examples.eze2e.com/login/cookie/weather.php#"
},
{
"to": "https://examples.eze2e.com/login/cookie/logout.php"
}
],
"ui": [],
"sections": []
},
"https://examples.eze2e.com/login/cookie/logout.php": {
"forms": [
{
"action": "https://examples.eze2e.com/login/cookie/login.php",
"inputs": [
{
"name": "user",
"tag": "input",
"type": "text"
},
{
"name": "pass",
"tag": "input",
"type": "password"
},
{
"name": "login",
"tag": "input",
"type": "submit"
}
]
}
],
"links": [],
"ui": [],
"sections": []
}
}

22
status.json Normal file
View File

@@ -0,0 +1,22 @@
{
"https://examples.eze2e.com/login/cookie/login.php": {
"status": 200,
"finalUrl": "https://examples.eze2e.com/login/cookie/login.php",
"soft404": false
},
"https://examples.eze2e.com/login/cookie/index.php": {
"status": 200,
"finalUrl": "https://examples.eze2e.com/login/cookie/index.php",
"soft404": false
},
"https://examples.eze2e.com/login/cookie/weather.php": {
"status": 200,
"finalUrl": "https://examples.eze2e.com/login/cookie/weather.php",
"soft404": false
},
"https://examples.eze2e.com/login/cookie/logout.php": {
"status": 200,
"finalUrl": "https://examples.eze2e.com/login/cookie/login.php",
"soft404": false
}
}

50
test-executor.js Normal file
View File

@@ -0,0 +1,50 @@
//@5
/*
Consumes : `action-tests.json`
Writes to : [console]
*/
// test-executor.js
console.log("Loading: test-executor.js");
import fs from "fs";
import { chromium } from "playwright";
const tests = JSON.parse(fs.readFileSync("./action-tests.json", "utf8")).actionTests;
function normalizeValue(value) {
if (typeof value === "string" && value.startsWith("crawl_")) {
return value.replace(/^crawl_/, "");
}
return value;
}
(async () => {
const browser = await chromium.launch({ headless: false });
const page = await browser.newPage();
for (const test of tests) {
try {
await page.goto(test.url, { waitUntil: "networkidle" });
if (test.type === "input") {
const value = normalizeValue(test.value);
await page.waitForSelector(test.selector);
await page.fill(test.selector, value);
}
if (test.type === "click") {
await page.waitForSelector(test.selector);
await page.click(test.selector);
}
console.log("✓", test.type.toUpperCase(), test.selector);
} catch (err) {
console.error("✗ Test failed:", test, err);
}
}
await browser.close();
})();

87
test-generator.js Normal file
View File

@@ -0,0 +1,87 @@
// test-generator.js
//@5
/*
Reads from : flows.json
Writes to : tests/
Short friendly labels enabled
*/
console.log("Loading: test-generator.js");
import fs from "fs";
const data = JSON.parse(fs.readFileSync("./flows.json", "utf8"));
const flows = data.flows || [];
if (!fs.existsSync("tests")) fs.mkdirSync("tests");
let counter = 1;
for (const flow of flows) {
const testName = `test-${counter}.spec.js`;
const steps = flow
.map((url, i) => {
return ` await page.goto("${url}", { waitUntil: "networkidle" });`;
})
.join("\n");
const content = `
import { test } from "@playwright/test";
test("Flow ${counter}", async ({ page }) => {
${steps}
});
`.trimStart();
fs.writeFileSync(`tests/${testName}`, content);
counter++;
}
console.log("Generated tests/");
// test-generator.js
//@5
/*
Reads from : flows.json
Writes to : tests/
console.log("Loading: test-generator.js");
import fs from "fs";
const data = JSON.parse(fs.readFileSync("./flows.json", "utf8"));
const flows = data.flows || [];
if (!fs.existsSync("tests")) fs.mkdirSync("tests");
let counter = 1;
for (const flow of flows) {
const testName = `test-${counter}.spec.js`;
const steps = flow
.map((url, i) => {
if (i === 0) {
return ` await page.goto("${url}", { waitUntil: "networkidle" });`;
}
return ` await page.goto("${url}", { waitUntil: "networkidle" });`;
})
.join("\n");
const content = `
import { test } from "@playwright/test";
test("Flow ${counter}", async ({ page }) => {
${steps}
});
`.trimStart();
fs.writeFileSync(`tests/${testName}`, content);
counter++;
}
console.log("Generated tests/");
*/

5
tests/test-1.spec.js Normal file
View File

@@ -0,0 +1,5 @@
import { test } from "@playwright/test";
test("Flow 1", async ({ page }) => {
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
});

6
tests/test-2.spec.js Normal file
View File

@@ -0,0 +1,6 @@
import { test } from "@playwright/test";
test("Flow 2", async ({ page }) => {
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
await page.goto("https://examples.eze2e.com/login/cookie/index.php", { waitUntil: "networkidle" });
});

7
tests/test-3.spec.js Normal file
View File

@@ -0,0 +1,7 @@
import { test } from "@playwright/test";
test("Flow 3", async ({ page }) => {
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
await page.goto("https://examples.eze2e.com/login/cookie/index.php", { waitUntil: "networkidle" });
await page.goto("https://examples.eze2e.com/login/cookie/weather.php", { waitUntil: "networkidle" });
});

8
tests/test-4.spec.js Normal file
View File

@@ -0,0 +1,8 @@
import { test } from "@playwright/test";
test("Flow 4", async ({ page }) => {
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
await page.goto("https://examples.eze2e.com/login/cookie/index.php", { waitUntil: "networkidle" });
await page.goto("https://examples.eze2e.com/login/cookie/weather.php", { waitUntil: "networkidle" });
await page.goto("https://examples.eze2e.com/login/cookie/logout.php", { waitUntil: "networkidle" });
});

51
utils/normalizeUrl.js Normal file
View File

@@ -0,0 +1,51 @@
// utils/normalizeUrl.js
export function normalizeUrl(raw) {
if (!raw) return null;
try {
raw = raw.trim();
if (/^(javascript:|mailto:|tel:|data:)/i.test(raw)) return null;
const noHash = raw.split("#")[0];
const url = new URL(noHash);
url.protocol = url.protocol.toLowerCase();
url.hostname = url.hostname.toLowerCase();
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
url.pathname = url.pathname.slice(0, -1);
}
return url.toString();
} catch {
return null;
}
}
/*
// utils/normalizeUrl.js
console.log("Loading: utils/normalizeUrl.js");
export function normalizeUrl(rawUrl) {
try {
const url = new URL(rawUrl);
url.hash = "";
url.hostname = url.hostname.toLowerCase();
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
url.pathname = url.pathname.slice(0, -1);
}
if (url.searchParams && [...url.searchParams].length > 1) {
const sorted = [...url.searchParams.entries()].sort();
url.search = "";
for (const [k, v] of sorted) url.searchParams.append(k, v);
}
return url.toString();
} catch {
return rawUrl;
}
}
*/

136
utils/urlPatternMatcher.js Normal file
View File

@@ -0,0 +1,136 @@
// utils/urlPatternMatcher.js
export class UrlPatternMatcher {
constructor(includePatterns = [], excludePatterns = [], options = {}) {
this.include = includePatterns;
this.exclude = excludePatterns;
this.debug = options.debug || false;
}
log(...args) {
if (this.debug) console.log("[UrlPatternMatcher]", ...args);
}
allow(url) {
for (const pat of this.exclude) {
if (this.match(url, pat)) {
this.log("EXCLUDE:", pat, "→", url);
return false;
}
}
let included = false;
for (const pat of this.include) {
if (this.match(url, pat)) {
included = true;
break;
}
}
if (!included) {
this.log("NO INCLUDE:", url);
return false;
}
this.log("ALLOW:", url);
return true;
}
match(url, pattern) {
if (!pattern) return false;
if (pattern.startsWith("^")) {
try {
const re = new RegExp(pattern);
const ok = re.test(url);
this.log("REGEX:", pattern, "→", ok);
return ok;
} catch {
return false;
}
}
const ok = url.includes(pattern);
this.log("SUBSTR:", `"${pattern}" in "${url}" →`, ok);
return ok;
}
}
/*
console.log("Loading: utils/urlPatternMatcher.js");
export class UrlPatternMatcher {
constructor(includePatterns = [], excludePatterns = [], options = {}) {
this.includePatterns = includePatterns;
this.excludePatterns = excludePatterns;
this.options = {
logging: false,
...options
};
}
log(...args) {
if (this.options.logging) {
console.log("[UrlPatternMatcher]", ...args);
}
}
isRegex(pattern) {
return (
pattern.startsWith("^") ||
pattern.endsWith("$") ||
pattern.includes(".*") ||
pattern.includes("\\")
);
}
match(url, pattern) {
if (this.isRegex(pattern)) {
const re = new RegExp(pattern);
const ok = re.test(url);
this.log(`Regex test: ${pattern} → ${ok}`);
return ok;
}
const ok = url.includes(pattern);
this.log(`Substring test: "${pattern}" in "${url}" → ${ok}`);
return ok;
}
matchesAny(url, patterns) {
return patterns.some(p => this.match(url, p));
}
shouldExclude(url) {
const excluded = this.matchesAny(url, this.excludePatterns);
if (excluded) {
this.log(`EXCLUDED: ${url}`);
}
return excluded;
}
shouldInclude(url) {
if (!this.includePatterns.length) {
this.log(`Included by default (no includePatterns): ${url}`);
return true;
}
const included = this.matchesAny(url, this.includePatterns);
this.log(`Include check for ${url} → ${included}`);
return included;
}
allow(url) {
if (this.shouldExclude(url)) {
this.log(`Final decision: BLOCKED → ${url}`);
return false;
}
const allowed = this.shouldInclude(url);
this.log(`Final decision: ${allowed ? "ALLOW" : "BLOCK"} → ${url}`);
return allowed;
}
}
*/