Uploaded Entire Project
This commit is contained in:
3
action-tests.json
Normal file
3
action-tests.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"actionTests": []
|
||||
}
|
||||
66
cli.js
Normal file
66
cli.js
Normal file
@@ -0,0 +1,66 @@
|
||||
//@1
|
||||
console.log("Loading: cli.js");
|
||||
|
||||
/*
|
||||
Consumes : `login-config.json e.g.`
|
||||
e.g. "startURL": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"domain" : "https://examples.eze2e.com",
|
||||
*/
|
||||
|
||||
/* Command Line Commands..
|
||||
Step 1:
|
||||
node cli.js
|
||||
|
||||
Step 2:
|
||||
node generate-graph-data.js
|
||||
>Loading: graph-utils.js
|
||||
>graph-data.js written
|
||||
|
||||
Step 3:
|
||||
open graph.html
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import { execSync } from "child_process";
|
||||
/*
|
||||
`Runs` these..
|
||||
"crawler-flow.js" , "Step 1: Crawling flows"
|
||||
"selectors-crawler.js" , "Step 2: Crawling selectors"
|
||||
"test-generator.js" , "Step 3: Generating tests"
|
||||
"test-executor.js" , "Step 4: Running tests"
|
||||
*/
|
||||
|
||||
|
||||
const ACTION_TIMEOUT = Number(process.env.PW_ACTION_TIMEOUT) || 5000;
|
||||
const NAV_TIMEOUT = Number(process.env.PW_NAV_TIMEOUT) || 5000;
|
||||
|
||||
console.log(`Using timeouts: ACTION_TIMEOUT=${ACTION_TIMEOUT}ms, NAV_TIMEOUT=${NAV_TIMEOUT}ms`);
|
||||
|
||||
function run(script, label) {
|
||||
console.log(`\n=== ${label} (${script}) ===`);
|
||||
execSync(`node ${script}`, {
|
||||
stdio: "inherit",
|
||||
env: {
|
||||
...process.env,
|
||||
PW_ACTION_TIMEOUT: ACTION_TIMEOUT,
|
||||
PW_NAV_TIMEOUT: NAV_TIMEOUT
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const mode = process.argv[2];
|
||||
const outFile = process.argv[3];
|
||||
|
||||
console.log("\n=== Starting full pipeline ===");
|
||||
|
||||
run("crawler-flow.js", "Step 1: Crawling flows");
|
||||
run("selectors-crawler.js", "Step 2: Crawling selectors");
|
||||
run("test-generator.js", "Step 3: Generating tests");
|
||||
run("test-executor.js", "Step 4: Running tests");
|
||||
|
||||
console.log("NEXT STEPS..");
|
||||
console.log("node generate-graph-data.js (to generate graph data)");
|
||||
console.log("open graph.html (to view graph , can view in browser from file explorer too)");
|
||||
|
||||
console.log("\n=== Pipeline complete ===");
|
||||
|
||||
476
crawler-flow.js
Normal file
476
crawler-flow.js
Normal file
@@ -0,0 +1,476 @@
|
||||
// crawler-flow.js
|
||||
//@2
|
||||
/*
|
||||
Writes to : flows.json
|
||||
: status.json
|
||||
Consumed by : graph-utils.js
|
||||
Short friendly labels enabled
|
||||
*/
|
||||
|
||||
console.log("Loading: crawler-flow.js");
|
||||
|
||||
import fs from "fs";
|
||||
import { chromium } from "playwright";
|
||||
import { normalizeUrl } from "./utils/normalizeUrl.js";
|
||||
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
|
||||
|
||||
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
|
||||
const {
|
||||
startURL,
|
||||
maxDepth,
|
||||
loginConfig,
|
||||
includePatterns = [],
|
||||
excludePatterns = [],
|
||||
matcherOptions = {}
|
||||
} = config;
|
||||
|
||||
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
|
||||
|
||||
const visited = new Set();
|
||||
const flows = [];
|
||||
const statusMap = {};
|
||||
|
||||
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
|
||||
|
||||
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
|
||||
|
||||
function isLogoutElement(info) {
|
||||
const text = (info.text || "").toLowerCase();
|
||||
const href = (info.href || "").toLowerCase();
|
||||
const onclick = (info.onclick || "").toLowerCase();
|
||||
return LOGOUT_KEYWORDS.some(k =>
|
||||
text.includes(k) || href.includes(k) || onclick.includes(k)
|
||||
);
|
||||
}
|
||||
|
||||
// Generate short friendly labels for UI and section nodes
|
||||
function friendlyLabel(text, role, dataset, tag, index) {
|
||||
const clean = s =>
|
||||
String(s || "")
|
||||
.trim()
|
||||
.replace(/\s+/g, "-")
|
||||
.replace(/[^a-zA-Z0-9-_]/g, "")
|
||||
.toLowerCase();
|
||||
|
||||
if (text && clean(text)) return clean(text);
|
||||
if (dataset?.tab) return `tab-${clean(dataset.tab)}`;
|
||||
if (dataset?.modal) return `modal-${clean(dataset.modal)}`;
|
||||
if (dataset?.accordion) return `accordion-${clean(dataset.accordion)}`;
|
||||
if (role && clean(role)) return clean(role);
|
||||
return `${tag}-${index}`;
|
||||
}
|
||||
|
||||
async function getClickableElements(page) {
|
||||
const clickables = await page.$$eval(
|
||||
`
|
||||
button,
|
||||
[onclick],
|
||||
a[onclick]:not([href]),
|
||||
img[onclick],
|
||||
img[role='button'],
|
||||
img[tabindex],
|
||||
div[onclick],
|
||||
span[onclick],
|
||||
[role='button'],
|
||||
[data-toggle],
|
||||
[data-tab],
|
||||
[data-accordion]
|
||||
`,
|
||||
els =>
|
||||
els.map((el, index) => ({
|
||||
text: el.innerText || "",
|
||||
href: el.getAttribute("href"),
|
||||
onclick: el.getAttribute("onclick"),
|
||||
tag: el.tagName.toLowerCase(),
|
||||
role: el.getAttribute("role"),
|
||||
dataset: { ...el.dataset },
|
||||
index,
|
||||
friendly: null,
|
||||
xpath: (() => {
|
||||
let path = "";
|
||||
let current = el;
|
||||
while (current && current.nodeType === 1) {
|
||||
let idx = 1;
|
||||
let sibling = current.previousElementSibling;
|
||||
while (sibling) {
|
||||
if (sibling.tagName === current.tagName) idx++;
|
||||
sibling = sibling.previousElementSibling;
|
||||
}
|
||||
path = `/${current.tagName}[${idx}]` + path;
|
||||
current = current.parentElement;
|
||||
}
|
||||
return path;
|
||||
})()
|
||||
}))
|
||||
);
|
||||
|
||||
for (const el of clickables)
|
||||
el.friendly = friendlyLabel(el.text, el.role, el.dataset, el.tag, el.index);
|
||||
|
||||
return clickables;
|
||||
}
|
||||
|
||||
async function scrollToReveal(page) {
|
||||
await page.evaluate(async () => {
|
||||
const total =
|
||||
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
|
||||
const step = Math.max(200, Math.floor(total / 5));
|
||||
for (let y = 0; y < total; y += step) {
|
||||
window.scrollTo(0, y);
|
||||
await new Promise(r => setTimeout(r, 150));
|
||||
}
|
||||
window.scrollTo(0, total);
|
||||
});
|
||||
}
|
||||
|
||||
async function discoverByClicking(page, url, depth, path) {
|
||||
await scrollToReveal(page);
|
||||
|
||||
const clickables = await getClickableElements(page);
|
||||
|
||||
for (const info of clickables) {
|
||||
if (isLogoutElement(info)) continue;
|
||||
|
||||
try {
|
||||
const beforeLinks = new Set(
|
||||
await page.$$eval("a[href]", as => as.map(a => a.href))
|
||||
);
|
||||
|
||||
await page.evaluate(xpath => {
|
||||
const getNode = xp => {
|
||||
const result = document.evaluate(
|
||||
xp,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
return result.singleNodeValue;
|
||||
};
|
||||
const el = getNode(xpath);
|
||||
if (el) el.click();
|
||||
}, info.xpath);
|
||||
|
||||
await page.waitForTimeout(400);
|
||||
|
||||
const afterLinks = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href)
|
||||
);
|
||||
|
||||
for (const link of afterLinks) {
|
||||
if (!beforeLinks.has(link)) {
|
||||
const normalized = normalizeUrl(link);
|
||||
if (normalized) {
|
||||
await crawl(page, normalized, depth + 1, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
async function performLoginIfNeeded(page, url) {
|
||||
for (const rule of loginConfig.logins) {
|
||||
if (url.includes(rule.match)) {
|
||||
for (const [name, value] of Object.entries(rule.fields)) {
|
||||
const sel = `input[name="${name}"]`;
|
||||
await page.waitForSelector(sel);
|
||||
await page.fill(sel, value);
|
||||
}
|
||||
|
||||
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
|
||||
await page.waitForSelector(submitSel);
|
||||
await page.click(submitSel);
|
||||
|
||||
await page.waitForLoadState("networkidle");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function crawl(page, rawUrl, depth, path) {
|
||||
const url = normalizeUrl(rawUrl);
|
||||
|
||||
if (depth > maxDepth) return;
|
||||
if (!matcher.allow(url)) return;
|
||||
if (visited.has(url)) return;
|
||||
|
||||
visited.add(url);
|
||||
|
||||
const response = await page.goto(url, { waitUntil: "networkidle" });
|
||||
const status = response?.status() || 0;
|
||||
const finalUrl = response?.url() || url;
|
||||
const title = await page.title();
|
||||
const soft404 = title.includes("404");
|
||||
|
||||
statusMap[url] = { status, finalUrl, soft404 };
|
||||
|
||||
await performLoginIfNeeded(page, url);
|
||||
|
||||
const newPath = [...path, url];
|
||||
flows.push(newPath);
|
||||
|
||||
await discoverByClicking(page, url, depth, newPath);
|
||||
|
||||
const links = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href).filter(h => h.startsWith("http"))
|
||||
);
|
||||
|
||||
const normalizedLinks = links
|
||||
.map(h => normalizeUrl(h))
|
||||
.filter(h => h);
|
||||
|
||||
for (const link of normalizedLinks) {
|
||||
await crawl(page, link, depth + 1, newPath);
|
||||
}
|
||||
|
||||
if (selectors[url]) {
|
||||
const entry = selectors[url];
|
||||
|
||||
for (const link of entry.links || []) {
|
||||
const target = normalizeUrl(link.to);
|
||||
if (target) await crawl(page, target, depth + 1, newPath);
|
||||
}
|
||||
|
||||
for (const form of entry.forms || []) {
|
||||
const target = normalizeUrl(form.action);
|
||||
if (target) await crawl(page, target, depth + 1, newPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await crawl(page, startURL, 0, []);
|
||||
|
||||
fs.writeFileSync("flows.json", JSON.stringify({ flows }, null, 2));
|
||||
fs.writeFileSync("status.json", JSON.stringify(statusMap, null, 2));
|
||||
|
||||
await browser.close();
|
||||
})();
|
||||
|
||||
|
||||
// crawler-flow.js
|
||||
//@2
|
||||
/*
|
||||
Writes to : flows.json
|
||||
: status.json
|
||||
Consumed by : graph-utils.js
|
||||
console.log("Loading: crawler-flow.js");
|
||||
|
||||
import fs from "fs";
|
||||
import { chromium } from "playwright";
|
||||
import { normalizeUrl } from "./utils/normalizeUrl.js";
|
||||
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
|
||||
|
||||
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
|
||||
const {
|
||||
startURL,
|
||||
maxDepth,
|
||||
loginConfig,
|
||||
includePatterns = [],
|
||||
excludePatterns = [],
|
||||
matcherOptions = {}
|
||||
} = config;
|
||||
|
||||
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
|
||||
|
||||
const visited = new Set();
|
||||
const flows = [];
|
||||
const statusMap = {};
|
||||
|
||||
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
|
||||
|
||||
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
|
||||
|
||||
function isLogoutElement(info) {
|
||||
const text = (info.text || "").toLowerCase();
|
||||
const href = (info.href || "").toLowerCase();
|
||||
const onclick = (info.onclick || "").toLowerCase();
|
||||
return LOGOUT_KEYWORDS.some(k =>
|
||||
text.includes(k) || href.includes(k) || onclick.includes(k)
|
||||
);
|
||||
}
|
||||
|
||||
async function getClickableElements(page) {
|
||||
return await page.$$eval(
|
||||
`
|
||||
button,
|
||||
[onclick],
|
||||
a[onclick]:not([href]),
|
||||
img[onclick],
|
||||
img[role='button'],
|
||||
img[tabindex],
|
||||
div[onclick],
|
||||
span[onclick],
|
||||
[role='button'],
|
||||
[data-toggle],
|
||||
[data-tab],
|
||||
[data-accordion]
|
||||
`,
|
||||
els =>
|
||||
els.map(el => ({
|
||||
text: el.innerText || "",
|
||||
href: el.getAttribute("href"),
|
||||
onclick: el.getAttribute("onclick"),
|
||||
xpath: (() => {
|
||||
let path = "";
|
||||
let current = el;
|
||||
while (current && current.nodeType === 1) {
|
||||
let index = 1;
|
||||
let sibling = current.previousElementSibling;
|
||||
while (sibling) {
|
||||
if (sibling.tagName === current.tagName) index++;
|
||||
sibling = sibling.previousElementSibling;
|
||||
}
|
||||
path = `/${current.tagName}[${index}]` + path;
|
||||
current = current.parentElement;
|
||||
}
|
||||
return path;
|
||||
})()
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
async function scrollToReveal(page) {
|
||||
await page.evaluate(async () => {
|
||||
const total =
|
||||
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
|
||||
const step = Math.max(200, Math.floor(total / 5));
|
||||
for (let y = 0; y < total; y += step) {
|
||||
window.scrollTo(0, y);
|
||||
await new Promise(r => setTimeout(r, 150));
|
||||
}
|
||||
window.scrollTo(0, total);
|
||||
});
|
||||
}
|
||||
|
||||
async function discoverByClicking(page, url, depth, path) {
|
||||
await scrollToReveal(page);
|
||||
|
||||
const clickables = await getClickableElements(page);
|
||||
|
||||
for (const info of clickables) {
|
||||
if (isLogoutElement(info)) continue;
|
||||
|
||||
try {
|
||||
const beforeLinks = new Set(
|
||||
await page.$$eval("a[href]", as => as.map(a => a.href))
|
||||
);
|
||||
|
||||
await page.evaluate(xpath => {
|
||||
const getNode = xp => {
|
||||
const result = document.evaluate(
|
||||
xp,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
return result.singleNodeValue;
|
||||
};
|
||||
const el = getNode(xpath);
|
||||
if (el) el.click();
|
||||
}, info.xpath);
|
||||
|
||||
await page.waitForTimeout(400);
|
||||
|
||||
const afterLinks = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href)
|
||||
);
|
||||
|
||||
for (const link of afterLinks) {
|
||||
if (!beforeLinks.has(link)) {
|
||||
const normalized = normalizeUrl(link);
|
||||
if (normalized) {
|
||||
await crawl(page, normalized, depth + 1, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
async function performLoginIfNeeded(page, url) {
|
||||
for (const rule of loginConfig.logins) {
|
||||
if (url.includes(rule.match)) {
|
||||
for (const [name, value] of Object.entries(rule.fields)) {
|
||||
const sel = `input[name="${name}"]`;
|
||||
await page.waitForSelector(sel);
|
||||
await page.fill(sel, value);
|
||||
}
|
||||
|
||||
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
|
||||
await page.waitForSelector(submitSel);
|
||||
await page.click(submitSel);
|
||||
|
||||
await page.waitForLoadState("networkidle");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function crawl(page, rawUrl, depth, path) {
|
||||
const url = normalizeUrl(rawUrl);
|
||||
|
||||
if (depth > maxDepth) return;
|
||||
if (!matcher.allow(url)) return;
|
||||
if (visited.has(url)) return;
|
||||
|
||||
visited.add(url);
|
||||
|
||||
const response = await page.goto(url, { waitUntil: "networkidle" });
|
||||
const status = response?.status() || 0;
|
||||
const finalUrl = response?.url() || url;
|
||||
const title = await page.title();
|
||||
const soft404 = title.includes("404");
|
||||
|
||||
statusMap[url] = { status, finalUrl, soft404 };
|
||||
|
||||
await performLoginIfNeeded(page, url);
|
||||
|
||||
const newPath = [...path, url];
|
||||
flows.push(newPath);
|
||||
|
||||
await discoverByClicking(page, url, depth, newPath);
|
||||
|
||||
const links = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href).filter(h => h.startsWith("http"))
|
||||
);
|
||||
|
||||
const normalizedLinks = links
|
||||
.map(h => normalizeUrl(h))
|
||||
.filter(h => h);
|
||||
|
||||
for (const link of normalizedLinks) {
|
||||
await crawl(page, link, depth + 1, newPath);
|
||||
}
|
||||
|
||||
if (selectors[url]) {
|
||||
const entry = selectors[url];
|
||||
|
||||
for (const link of entry.links || []) {
|
||||
const target = normalizeUrl(link.to);
|
||||
if (target) await crawl(page, target, depth + 1, newPath);
|
||||
}
|
||||
|
||||
for (const form of entry.forms || []) {
|
||||
const target = normalizeUrl(form.action);
|
||||
if (target) await crawl(page, target, depth + 1, newPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await crawl(page, startURL, 0, []);
|
||||
|
||||
fs.writeFileSync("flows.json", JSON.stringify({ flows }, null, 2));
|
||||
fs.writeFileSync("status.json", JSON.stringify(statusMap, null, 2));
|
||||
|
||||
await browser.close();
|
||||
})();
|
||||
|
||||
*/
|
||||
22
flows.json
Normal file
22
flows.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"flows": [
|
||||
[
|
||||
"https://examples.eze2e.com/login/cookie/login.php"
|
||||
],
|
||||
[
|
||||
"https://examples.eze2e.com/login/cookie/login.php",
|
||||
"https://examples.eze2e.com/login/cookie/index.php"
|
||||
],
|
||||
[
|
||||
"https://examples.eze2e.com/login/cookie/login.php",
|
||||
"https://examples.eze2e.com/login/cookie/index.php",
|
||||
"https://examples.eze2e.com/login/cookie/weather.php"
|
||||
],
|
||||
[
|
||||
"https://examples.eze2e.com/login/cookie/login.php",
|
||||
"https://examples.eze2e.com/login/cookie/index.php",
|
||||
"https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"https://examples.eze2e.com/login/cookie/logout.php"
|
||||
]
|
||||
]
|
||||
}
|
||||
77
generate-graph-data.js
Normal file
77
generate-graph-data.js
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
// generate-graph-data.js
|
||||
|
||||
import fs from "fs";
|
||||
import { buildGraphImpl } from "./methods/buildGraph.js";
|
||||
import { analyzeGraph } from "./methods/analyzeGraph.js";
|
||||
|
||||
const flows = JSON.parse(fs.readFileSync("flows.json", "utf8"));
|
||||
const statusMap = JSON.parse(fs.readFileSync("status.json", "utf8"));
|
||||
|
||||
const graph = buildGraphImpl(flows, statusMap);
|
||||
const analysis = analyzeGraph(graph);
|
||||
|
||||
fs.writeFileSync(
|
||||
"graph-data.js",
|
||||
`const graphData = ${JSON.stringify(analysis, null, 2)};`
|
||||
);
|
||||
|
||||
console.log("graph-data.js written");
|
||||
*/
|
||||
|
||||
|
||||
|
||||
//@6
|
||||
/*
|
||||
Write out to: `graph-data.js`
|
||||
Consumed by : `graph.html`
|
||||
*/
|
||||
|
||||
import fs from "fs";
|
||||
import { buildGraph } from "./graph-utils.js";
|
||||
|
||||
const g = buildGraph(); // your internal graph object
|
||||
|
||||
const nodes = [];
|
||||
for (const [id, info] of g.nodes) {
|
||||
nodes.push({
|
||||
id,
|
||||
label: info.label,
|
||||
baseColor: info.types.has("form") ? "#cc5500"
|
||||
: info.types.has("link") ? "#00aa44"
|
||||
: "#0077cc",
|
||||
fullUrl: info.fullUrl,
|
||||
cluster: info.cluster,
|
||||
types: [...info.types]
|
||||
});
|
||||
}
|
||||
|
||||
const edges = g.edges.map((e, i) => ({
|
||||
id: "e" + i,
|
||||
from: e.from,
|
||||
to: e.to,
|
||||
type: e.type
|
||||
}));
|
||||
|
||||
// ----------------------------------------------------
|
||||
// PATCH: add BROKEN_LINKS to the generated output
|
||||
// ----------------------------------------------------
|
||||
const out = `
|
||||
const NODES = ${JSON.stringify(nodes, null, 2)};
|
||||
const EDGES = ${JSON.stringify(edges, null, 2)};
|
||||
const DEAD_ENDS = ${JSON.stringify(g.deadEnds, null, 2)};
|
||||
const PAGE_RANK = ${JSON.stringify(g.pageRank, null, 2)};
|
||||
const CYCLES = ${JSON.stringify(g.cycles, null, 2)};
|
||||
const CYCLES_SET = new Set(CYCLES.flat());
|
||||
const TOP_RANK_THRESHOLD = ${g.rankThreshold};
|
||||
const BROKEN_LINKS = ${JSON.stringify(g.brokenLinks, null, 2)};
|
||||
const NODE_INFO = {};
|
||||
NODES.forEach(n => NODE_INFO[n.id] = n);
|
||||
`;
|
||||
|
||||
fs.writeFileSync("graph-data.js", out);
|
||||
console.log("graph-data.js written");
|
||||
console.log("");
|
||||
console.log("NEXT STEPS..");
|
||||
console.log("open graph.html (to view graph , can view in browser from file explorer too)");
|
||||
|
||||
198
graph-data.js
Normal file
198
graph-data.js
Normal file
@@ -0,0 +1,198 @@
|
||||
|
||||
const NODES = [
|
||||
{
|
||||
"id": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"label": "/login/cookie/login.php",
|
||||
"baseColor": "#cc5500",
|
||||
"fullUrl": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"cluster": "login",
|
||||
"types": [
|
||||
"flow",
|
||||
"page",
|
||||
"form"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"label": "/login/cookie/index.php",
|
||||
"baseColor": "#00aa44",
|
||||
"fullUrl": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"cluster": "login",
|
||||
"types": [
|
||||
"flow",
|
||||
"page",
|
||||
"link"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"label": "/login/cookie/weather.php",
|
||||
"baseColor": "#00aa44",
|
||||
"fullUrl": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"cluster": "login",
|
||||
"types": [
|
||||
"flow",
|
||||
"link",
|
||||
"page"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"label": "/login/cookie/logout.php",
|
||||
"baseColor": "#00aa44",
|
||||
"fullUrl": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"cluster": "login",
|
||||
"types": [
|
||||
"flow",
|
||||
"link",
|
||||
"page"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://examples.eze2e.com/login/cookie/index.php#",
|
||||
"label": "/login/cookie/index.php",
|
||||
"baseColor": "#00aa44",
|
||||
"fullUrl": "https://examples.eze2e.com/login/cookie/index.php#",
|
||||
"cluster": "login",
|
||||
"types": [
|
||||
"link"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "https://examples.eze2e.com/login/cookie/weather.php#",
|
||||
"label": "/login/cookie/weather.php",
|
||||
"baseColor": "#00aa44",
|
||||
"fullUrl": "https://examples.eze2e.com/login/cookie/weather.php#",
|
||||
"cluster": "login",
|
||||
"types": [
|
||||
"link"
|
||||
]
|
||||
}
|
||||
];
|
||||
const EDGES = [
|
||||
{
|
||||
"id": "e0",
|
||||
"from": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"type": "flow"
|
||||
},
|
||||
{
|
||||
"id": "e1",
|
||||
"from": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"type": "flow"
|
||||
},
|
||||
{
|
||||
"id": "e2",
|
||||
"from": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"type": "flow"
|
||||
},
|
||||
{
|
||||
"id": "e3",
|
||||
"from": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"type": "flow"
|
||||
},
|
||||
{
|
||||
"id": "e4",
|
||||
"from": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"type": "flow"
|
||||
},
|
||||
{
|
||||
"id": "e5",
|
||||
"from": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"type": "flow"
|
||||
},
|
||||
{
|
||||
"id": "e6",
|
||||
"from": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php#",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e7",
|
||||
"from": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e8",
|
||||
"from": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e9",
|
||||
"from": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php#",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e10",
|
||||
"from": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e11",
|
||||
"from": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e12",
|
||||
"from": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e13",
|
||||
"from": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php#",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e14",
|
||||
"from": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"type": "link"
|
||||
},
|
||||
{
|
||||
"id": "e15",
|
||||
"from": "https://examples.eze2e.com/login/cookie/logout.php",
|
||||
"to": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"type": "form"
|
||||
}
|
||||
];
|
||||
const DEAD_ENDS = [
|
||||
"https://examples.eze2e.com/login/cookie/index.php#",
|
||||
"https://examples.eze2e.com/login/cookie/weather.php#"
|
||||
];
|
||||
const PAGE_RANK = {
|
||||
"https://examples.eze2e.com/login/cookie/login.php": 0.1007271871669504,
|
||||
"https://examples.eze2e.com/login/cookie/index.php": 0.08544952643888591,
|
||||
"https://examples.eze2e.com/login/cookie/weather.php": 0.08287660481566009,
|
||||
"https://examples.eze2e.com/login/cookie/logout.php": 0.08904887919093986,
|
||||
"https://examples.eze2e.com/login/cookie/index.php#": 0.05380999851672653,
|
||||
"https://examples.eze2e.com/login/cookie/weather.php#": 0.04261944033710667
|
||||
};
|
||||
const CYCLES = [
|
||||
[
|
||||
"https://examples.eze2e.com/login/cookie/login.php",
|
||||
"https://examples.eze2e.com/login/cookie/index.php",
|
||||
"https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"https://examples.eze2e.com/login/cookie/logout.php"
|
||||
],
|
||||
[
|
||||
"https://examples.eze2e.com/login/cookie/index.php",
|
||||
"https://examples.eze2e.com/login/cookie/weather.php"
|
||||
]
|
||||
];
|
||||
const CYCLES_SET = new Set(CYCLES.flat());
|
||||
const TOP_RANK_THRESHOLD = 0.1007271871669504;
|
||||
const BROKEN_LINKS = [];
|
||||
const NODE_INFO = {};
|
||||
NODES.forEach(n => NODE_INFO[n.id] = n);
|
||||
416
graph-utils.js
Normal file
416
graph-utils.js
Normal file
@@ -0,0 +1,416 @@
|
||||
// graph-utils.js
|
||||
//@7
|
||||
/*
|
||||
Consumes : flows.json
|
||||
: selectors.json
|
||||
: status.json
|
||||
Produces : graph object consumed by graph-data.js
|
||||
Short friendly labels enabled
|
||||
*/
|
||||
|
||||
console.log("Loading: graph-utils.js");
|
||||
|
||||
import fs from "fs";
|
||||
|
||||
export function buildGraph() {
|
||||
const flows = JSON.parse(fs.readFileSync("./flows.json", "utf8")).flows || [];
|
||||
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
|
||||
let status = {};
|
||||
try {
|
||||
status = JSON.parse(fs.readFileSync("./status.json", "utf8"));
|
||||
} catch {}
|
||||
|
||||
const nodes = new Map();
|
||||
const edges = [];
|
||||
|
||||
const normalize = url =>
|
||||
String(url || "")
|
||||
.trim()
|
||||
.replace(/\/+$/, "")
|
||||
.toLowerCase();
|
||||
|
||||
const shortPath = url => {
|
||||
try {
|
||||
const u = new URL(url);
|
||||
return u.pathname || "/";
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
};
|
||||
|
||||
const lastSegment = url => {
|
||||
const parts = url.split("/");
|
||||
return parts[parts.length - 1] || url;
|
||||
};
|
||||
|
||||
const clusterName = url => {
|
||||
const parts = url.split("/");
|
||||
return parts.length > 1 ? parts[1] : "root";
|
||||
};
|
||||
|
||||
function addNode(fullUrl, type = null) {
|
||||
const id = normalize(fullUrl);
|
||||
if (!id) return null;
|
||||
|
||||
if (!nodes.has(id)) {
|
||||
nodes.set(id, {
|
||||
fullUrl,
|
||||
label: shortPath(fullUrl),
|
||||
cluster: clusterName(shortPath(fullUrl)),
|
||||
types: new Set()
|
||||
});
|
||||
}
|
||||
if (type) nodes.get(id).types.add(type);
|
||||
return id;
|
||||
}
|
||||
|
||||
function addEdge(a, b, type) {
|
||||
if (!a || !b || a === b) return;
|
||||
edges.push({ from: a, to: b, type });
|
||||
}
|
||||
|
||||
// flows (navigation)
|
||||
for (const flow of flows) {
|
||||
const steps = flow.map(s => addNode(s, "flow")).filter(Boolean);
|
||||
for (let i = 0; i < steps.length - 1; i++) {
|
||||
addEdge(steps[i], steps[i + 1], "flow");
|
||||
}
|
||||
}
|
||||
|
||||
// selectors (links/forms/UI/sections)
|
||||
for (const page of Object.keys(selectors)) {
|
||||
const pageId = addNode(page, "page");
|
||||
const entry = selectors[page];
|
||||
|
||||
for (const link of entry.links || []) {
|
||||
const to = addNode(link.to, "link");
|
||||
addEdge(pageId, to, "link");
|
||||
}
|
||||
|
||||
for (const form of entry.forms || []) {
|
||||
const to = addNode(form.action, "form");
|
||||
addEdge(pageId, to, "form");
|
||||
}
|
||||
|
||||
for (const ui of entry.ui || []) {
|
||||
const uiId = addNode(`${shortPath(page)}#ui:${ui.friendly}`, "ui");
|
||||
addEdge(pageId, uiId, "ui");
|
||||
}
|
||||
|
||||
for (const section of entry.sections || []) {
|
||||
const secId = addNode(`${shortPath(page)}#section:${section.friendly}`, "section");
|
||||
addEdge(pageId, secId, "section");
|
||||
}
|
||||
}
|
||||
|
||||
const outgoing = new Map();
|
||||
for (const e of edges) {
|
||||
outgoing.set(e.from, (outgoing.get(e.from) || 0) + 1);
|
||||
}
|
||||
const deadEnds = [...nodes.keys()].filter(n => !outgoing.has(n));
|
||||
|
||||
const nodeIds = [...nodes.keys()];
|
||||
const N = nodeIds.length;
|
||||
const index = new Map(nodeIds.map((id, i) => [id, i]));
|
||||
|
||||
const incoming = Array.from({ length: N }, () => []);
|
||||
const outdeg = new Array(N).fill(0);
|
||||
|
||||
for (const e of edges) {
|
||||
const a = index.get(e.from);
|
||||
const b = index.get(e.to);
|
||||
if (a == null || b == null) continue;
|
||||
outdeg[a]++;
|
||||
incoming[b].push(a);
|
||||
}
|
||||
|
||||
const d = 0.85;
|
||||
let rank = new Array(N).fill(1 / N);
|
||||
|
||||
for (let iter = 0; iter < 20; iter++) {
|
||||
const next = new Array(N).fill((1 - d) / N);
|
||||
for (let i = 0; i < N; i++) {
|
||||
for (const j of incoming[i]) {
|
||||
if (outdeg[j] > 0) next[i] += d * (rank[j] / outdeg[j]);
|
||||
}
|
||||
}
|
||||
rank = next;
|
||||
}
|
||||
|
||||
const pageRank = {};
|
||||
for (let i = 0; i < N; i++) {
|
||||
pageRank[nodeIds[i]] = rank[i];
|
||||
}
|
||||
|
||||
const rankThreshold =
|
||||
[...rank].sort((a, b) => b - a)[Math.floor(N * 0.1)] || 0;
|
||||
|
||||
const adj = new Map();
|
||||
for (const id of nodeIds) adj.set(id, []);
|
||||
for (const e of edges) adj.get(e.from).push(e.to);
|
||||
|
||||
const visited2 = new Set();
|
||||
const stack = new Set();
|
||||
const cycles = [];
|
||||
|
||||
function dfs(node, path) {
|
||||
if (stack.has(node)) {
|
||||
const idx = path.indexOf(node);
|
||||
if (idx !== -1) cycles.push(path.slice(idx));
|
||||
return;
|
||||
}
|
||||
if (visited2.has(node)) return;
|
||||
|
||||
visited2.add(node);
|
||||
stack.add(node);
|
||||
path.push(node);
|
||||
|
||||
for (const next of adj.get(node)) {
|
||||
dfs(next, path);
|
||||
if (cycles.length > 50) break;
|
||||
}
|
||||
|
||||
path.pop();
|
||||
stack.delete(node);
|
||||
}
|
||||
|
||||
for (const id of nodeIds) {
|
||||
if (!visited2.has(id)) dfs(id, []);
|
||||
if (cycles.length > 50) break;
|
||||
}
|
||||
|
||||
const sitemap = [...nodes.values()]
|
||||
.map(n => ({
|
||||
url: n.fullUrl,
|
||||
cluster: n.cluster,
|
||||
types: [...n.types]
|
||||
}))
|
||||
.sort((a, b) => a.url.localeCompare(b.url));
|
||||
|
||||
const brokenLinks = Object.entries(status)
|
||||
.filter(([url, s]) =>
|
||||
s.status >= 400 ||
|
||||
s.status === 0 ||
|
||||
s.soft404 === true
|
||||
)
|
||||
.map(([url, s]) => ({
|
||||
url: normalize(url),
|
||||
status: s.status,
|
||||
finalUrl: normalize(s.finalUrl)
|
||||
}));
|
||||
|
||||
return {
|
||||
nodes,
|
||||
edges,
|
||||
deadEnds,
|
||||
pageRank,
|
||||
cycles,
|
||||
rankThreshold,
|
||||
sitemap,
|
||||
brokenLinks
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// graph-utils.js
|
||||
//@7
|
||||
/*
|
||||
Consumes : flows.json
|
||||
: selectors.json
|
||||
: status.json
|
||||
Produces : graph object consumed by graph-data.js
|
||||
console.log("Loading: graph-utils.js");
|
||||
|
||||
import fs from "fs";
|
||||
|
||||
export function buildGraph() {
|
||||
const flows = JSON.parse(fs.readFileSync("./flows.json", "utf8")).flows || [];
|
||||
const selectors = JSON.parse(fs.readFileSync("./selectors.json", "utf8"));
|
||||
let status = {};
|
||||
try {
|
||||
status = JSON.parse(fs.readFileSync("./status.json", "utf8"));
|
||||
} catch {}
|
||||
|
||||
const nodes = new Map();
|
||||
const edges = [];
|
||||
|
||||
const normalize = url =>
|
||||
String(url || "")
|
||||
.trim()
|
||||
.replace(/\/+$/, "")
|
||||
.toLowerCase();
|
||||
|
||||
const lastSegment = url => {
|
||||
const parts = url.split("/");
|
||||
return parts[parts.length - 1] || url;
|
||||
};
|
||||
|
||||
const clusterName = url => {
|
||||
const parts = url.split("/");
|
||||
return parts.length > 1 ? parts[1] : "root";
|
||||
};
|
||||
|
||||
function addNode(fullUrl, type = null) {
|
||||
const id = normalize(fullUrl);
|
||||
if (!id) return null;
|
||||
|
||||
if (!nodes.has(id)) {
|
||||
nodes.set(id, {
|
||||
fullUrl,
|
||||
label: lastSegment(id),
|
||||
cluster: clusterName(id),
|
||||
types: new Set()
|
||||
});
|
||||
}
|
||||
if (type) nodes.get(id).types.add(type);
|
||||
return id;
|
||||
}
|
||||
|
||||
function addEdge(a, b, type) {
|
||||
if (!a || !b || a === b) return;
|
||||
edges.push({ from: a, to: b, type });
|
||||
}
|
||||
|
||||
// flows (navigation)
|
||||
for (const flow of flows) {
|
||||
const steps = flow.map(s => addNode(s, "flow")).filter(Boolean);
|
||||
for (let i = 0; i < steps.length - 1; i++) {
|
||||
addEdge(steps[i], steps[i + 1], "flow");
|
||||
}
|
||||
}
|
||||
|
||||
// selectors (links/forms/UI/sections)
|
||||
for (const page of Object.keys(selectors)) {
|
||||
const pageId = addNode(page, "page");
|
||||
const entry = selectors[page];
|
||||
|
||||
for (const link of entry.links || []) {
|
||||
const to = addNode(link.to, "link");
|
||||
addEdge(pageId, to, "link");
|
||||
}
|
||||
|
||||
for (const form of entry.forms || []) {
|
||||
const to = addNode(form.action, "form");
|
||||
addEdge(pageId, to, "form");
|
||||
}
|
||||
|
||||
for (const ui of entry.ui || []) {
|
||||
const uiId = addNode(`${page}#ui:${ui.id}`, "ui");
|
||||
addEdge(pageId, uiId, "ui");
|
||||
}
|
||||
|
||||
for (const section of entry.sections || []) {
|
||||
const secId = addNode(`${page}#section:${section.id}`, "section");
|
||||
addEdge(pageId, secId, "section");
|
||||
}
|
||||
}
|
||||
|
||||
const outgoing = new Map();
|
||||
for (const e of edges) {
|
||||
outgoing.set(e.from, (outgoing.get(e.from) || 0) + 1);
|
||||
}
|
||||
const deadEnds = [...nodes.keys()].filter(n => !outgoing.has(n));
|
||||
|
||||
const nodeIds = [...nodes.keys()];
|
||||
const N = nodeIds.length;
|
||||
const index = new Map(nodeIds.map((id, i) => [id, i]));
|
||||
|
||||
const incoming = Array.from({ length: N }, () => []);
|
||||
const outdeg = new Array(N).fill(0);
|
||||
|
||||
for (const e of edges) {
|
||||
const a = index.get(e.from);
|
||||
const b = index.get(e.to);
|
||||
if (a == null || b == null) continue;
|
||||
outdeg[a]++;
|
||||
incoming[b].push(a);
|
||||
}
|
||||
|
||||
const d = 0.85;
|
||||
let rank = new Array(N).fill(1 / N);
|
||||
|
||||
for (let iter = 0; iter < 20; iter++) {
|
||||
const next = new Array(N).fill((1 - d) / N);
|
||||
for (let i = 0; i < N; i++) {
|
||||
for (const j of incoming[i]) {
|
||||
if (outdeg[j] > 0) next[i] += d * (rank[j] / outdeg[j]);
|
||||
}
|
||||
}
|
||||
rank = next;
|
||||
}
|
||||
|
||||
const pageRank = {};
|
||||
for (let i = 0; i < N; i++) {
|
||||
pageRank[nodeIds[i]] = rank[i];
|
||||
}
|
||||
|
||||
const rankThreshold =
|
||||
[...rank].sort((a, b) => b - a)[Math.floor(N * 0.1)] || 0;
|
||||
|
||||
const adj = new Map();
|
||||
for (const id of nodeIds) adj.set(id, []);
|
||||
for (const e of edges) adj.get(e.from).push(e.to);
|
||||
|
||||
const visited = new Set();
|
||||
const stack = new Set();
|
||||
const cycles = [];
|
||||
|
||||
function dfs(node, path) {
|
||||
if (stack.has(node)) {
|
||||
const idx = path.indexOf(node);
|
||||
if (idx !== -1) cycles.push(path.slice(idx));
|
||||
return;
|
||||
}
|
||||
if (visited.has(node)) return;
|
||||
|
||||
visited.add(node);
|
||||
stack.add(node);
|
||||
path.push(node);
|
||||
|
||||
for (const next of adj.get(node)) {
|
||||
dfs(next, path);
|
||||
if (cycles.length > 50) break;
|
||||
}
|
||||
|
||||
path.pop();
|
||||
stack.delete(node);
|
||||
}
|
||||
|
||||
for (const id of nodeIds) {
|
||||
if (!visited.has(id)) dfs(id, []);
|
||||
if (cycles.length > 50) break;
|
||||
}
|
||||
|
||||
const sitemap = [...nodes.values()]
|
||||
.map(n => ({
|
||||
url: n.fullUrl,
|
||||
cluster: n.cluster,
|
||||
types: [...n.types]
|
||||
}))
|
||||
.sort((a, b) => a.url.localeCompare(b.url));
|
||||
|
||||
const brokenLinks = Object.entries(status)
|
||||
.filter(([url, s]) =>
|
||||
s.status >= 400 ||
|
||||
s.status === 0 ||
|
||||
s.soft404 === true
|
||||
)
|
||||
.map(([url, s]) => ({
|
||||
url: normalize(url),
|
||||
status: s.status,
|
||||
finalUrl: normalize(s.finalUrl)
|
||||
}));
|
||||
|
||||
return {
|
||||
nodes,
|
||||
edges,
|
||||
deadEnds,
|
||||
pageRank,
|
||||
cycles,
|
||||
rankThreshold,
|
||||
sitemap,
|
||||
brokenLinks
|
||||
};
|
||||
}
|
||||
|
||||
*/
|
||||
122
graph.html
Normal file
122
graph.html
Normal file
@@ -0,0 +1,122 @@
|
||||
<!-- graph.html -->
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Flow Graph Viewer</title>
|
||||
<style>
|
||||
body { margin: 0; font-family: sans-serif; display: flex; height: 100vh; }
|
||||
#sidebar {
|
||||
width: 240px; background: #f4f4f4; padding: 12px; overflow-y: auto;
|
||||
border-right: 1px solid #ccc;
|
||||
}
|
||||
#network { flex: 1; }
|
||||
h3 { margin-top: 0; }
|
||||
.node-info { font-size: 12px; white-space: pre-wrap; }
|
||||
</style>
|
||||
<script src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
|
||||
<script src="graph-data.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div id="sidebar">
|
||||
<h3>Filters</h3>
|
||||
<label><input type="checkbox" id="flow" checked> Flow edges</label><br>
|
||||
<label><input type="checkbox" id="link" checked> Link edges</label><br>
|
||||
<label><input type="checkbox" id="form" checked> Form edges</label><br>
|
||||
<label><input type="checkbox" id="dead" checked> Show dead-ends</label><br>
|
||||
<label><input type="checkbox" id="cycles"> Highlight cycles</label><br>
|
||||
<label><input type="checkbox" id="rank"> Highlight top-ranked</label><br>
|
||||
<label><input type="checkbox" id="broken"> Highlight broken links</label><br>
|
||||
|
||||
<h3>Node</h3>
|
||||
<div id="info" class="node-info"></div>
|
||||
</div>
|
||||
|
||||
<div id="network"></div>
|
||||
|
||||
<script>
|
||||
const container = document.getElementById("network");
|
||||
|
||||
const data = {
|
||||
nodes: new vis.DataSet(NODES),
|
||||
edges: new vis.DataSet(EDGES)
|
||||
};
|
||||
|
||||
const network = new vis.Network(container, data, {
|
||||
layout: { improvedLayout: true },
|
||||
physics: { stabilization: false }
|
||||
});
|
||||
|
||||
function applyFilters() {
|
||||
const showFlow = document.getElementById("flow").checked;
|
||||
const showLink = document.getElementById("link").checked;
|
||||
const showForm = document.getElementById("form").checked;
|
||||
const showDead = document.getElementById("dead").checked;
|
||||
const highlightCycles = document.getElementById("cycles").checked;
|
||||
const highlightRank = document.getElementById("rank").checked;
|
||||
const highlightBroken = document.getElementById("broken").checked;
|
||||
|
||||
data.nodes.update(NODES.map(n => {
|
||||
const isDead = DEAD_ENDS.includes(n.id);
|
||||
const inCycle = CYCLES_SET.has(n.id);
|
||||
const rank = PAGE_RANK[n.id] || 0;
|
||||
|
||||
// -----------------------------
|
||||
// PATCH 1 — compare normalized IDs
|
||||
// -----------------------------
|
||||
const isBroken = BROKEN_LINKS.some(b => b.url === n.id);
|
||||
|
||||
return {
|
||||
id: n.id,
|
||||
hidden: !showDead && isDead,
|
||||
color:
|
||||
highlightBroken && isBroken ? "#ff0000" :
|
||||
highlightCycles && inCycle ? "#ff00aa" :
|
||||
highlightRank && rank > TOP_RANK_THRESHOLD ? "#ff8800" :
|
||||
n.baseColor
|
||||
};
|
||||
}));
|
||||
|
||||
data.edges.update(EDGES.map(e => ({
|
||||
id: e.id,
|
||||
hidden:
|
||||
(e.type === "flow" && !showFlow) ||
|
||||
(e.type === "link" && !showLink) ||
|
||||
(e.type === "form" && !showForm)
|
||||
})));
|
||||
}
|
||||
|
||||
document.querySelectorAll("#sidebar input").forEach(cb =>
|
||||
cb.addEventListener("change", applyFilters)
|
||||
);
|
||||
|
||||
network.on("click", params => {
|
||||
if (!params.nodes.length) {
|
||||
document.getElementById("info").textContent = "";
|
||||
return;
|
||||
}
|
||||
const id = params.nodes[0];
|
||||
const info = NODE_INFO[id];
|
||||
|
||||
// -----------------------------
|
||||
// PATCH 2 — compare normalized IDs
|
||||
// -----------------------------
|
||||
const broken = BROKEN_LINKS.find(b => b.url === id);
|
||||
|
||||
document.getElementById("info").textContent =
|
||||
`URL: ${info.fullUrl}
|
||||
Cluster: ${info.cluster}
|
||||
Types: ${info.types.join(", ")}
|
||||
PageRank: ${PAGE_RANK[id].toFixed(5)}
|
||||
Dead-end: ${DEAD_ENDS.includes(id)}
|
||||
In cycle: ${CYCLES_SET.has(id)}
|
||||
Broken: ${broken ? broken.status : "no"}`;
|
||||
});
|
||||
|
||||
applyFilters();
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
46
login-config.json
Normal file
46
login-config.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"startURL": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
|
||||
"startURL_not_used_3": "https://templates.eze2e.com",
|
||||
"startURL_not_used_4": "https://templates.eze2e.com/app",
|
||||
"startURLX": "https://www.eze2e.com",
|
||||
|
||||
"includePatterns": [
|
||||
"https://examples.eze2e.com/login/cookie"
|
||||
],
|
||||
|
||||
"XexcludePatterns": [
|
||||
"/photo-galleries",
|
||||
"/wp-content",
|
||||
"/survivors",
|
||||
"/members",
|
||||
"/yourdomain/dataprivacy",
|
||||
"/contact",
|
||||
"/forums",
|
||||
"/product-videos",
|
||||
"/contact",
|
||||
"/wp-login",
|
||||
"/log-in",
|
||||
"/logout",
|
||||
"/register-account",
|
||||
"/admin",
|
||||
"^https://example\\.com/private"
|
||||
],
|
||||
|
||||
"matcherOptions": {
|
||||
"logging": true
|
||||
} ,
|
||||
"maxDepth": 5,
|
||||
"loginConfig": {
|
||||
"logins": [
|
||||
{
|
||||
"match": "login.php",
|
||||
"fields": {
|
||||
"user": "demo",
|
||||
"pass": "password"
|
||||
},
|
||||
"submit": "login"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
558
selectors-crawler.js
Normal file
558
selectors-crawler.js
Normal file
@@ -0,0 +1,558 @@
|
||||
// selectors-crawler.js
|
||||
//@3
|
||||
/*
|
||||
Writes to : selectors.json
|
||||
Consumed by : graph-utils.js
|
||||
: test-generator.js
|
||||
Short friendly labels enabled
|
||||
*/
|
||||
|
||||
console.log("Loading: selectors-crawler.js");
|
||||
|
||||
import fs from "fs";
|
||||
import { chromium } from "playwright";
|
||||
import { normalizeUrl } from "./utils/normalizeUrl.js";
|
||||
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
|
||||
|
||||
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
|
||||
const {
|
||||
startURL,
|
||||
maxDepth,
|
||||
loginConfig,
|
||||
includePatterns = [],
|
||||
excludePatterns = [],
|
||||
matcherOptions = {}
|
||||
} = config;
|
||||
|
||||
const visited = new Set();
|
||||
const selectors = {};
|
||||
|
||||
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
|
||||
|
||||
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
|
||||
|
||||
function isLogoutElement(info) {
|
||||
const text = (info.text || "").toLowerCase();
|
||||
const href = (info.href || "").toLowerCase();
|
||||
const onclick = (info.onclick || "").toLowerCase();
|
||||
return LOGOUT_KEYWORDS.some(k =>
|
||||
text.includes(k) || href.includes(k) || onclick.includes(k)
|
||||
);
|
||||
}
|
||||
|
||||
// Generate short friendly labels
|
||||
function friendlyLabel(text, role, dataset, tag, index) {
|
||||
const clean = s =>
|
||||
String(s || "")
|
||||
.trim()
|
||||
.replace(/\s+/g, "-")
|
||||
.replace(/[^a-zA-Z0-9-_]/g, "")
|
||||
.toLowerCase();
|
||||
|
||||
if (text && clean(text)) return clean(text);
|
||||
if (dataset?.tab) return `tab-${clean(dataset.tab)}`;
|
||||
if (dataset?.modal) return `modal-${clean(dataset.modal)}`;
|
||||
if (dataset?.accordion) return `accordion-${clean(dataset.accordion)}`;
|
||||
if (role && clean(role)) return clean(role);
|
||||
return `${tag}-${index}`;
|
||||
}
|
||||
|
||||
async function getUIElements(page) {
|
||||
const ui = await page.$$eval(
|
||||
`
|
||||
button,
|
||||
[onclick],
|
||||
a[onclick]:not([href]),
|
||||
img[onclick],
|
||||
img[role='button'],
|
||||
img[tabindex],
|
||||
div[onclick],
|
||||
span[onclick],
|
||||
[role='button'],
|
||||
[data-toggle],
|
||||
[data-tab],
|
||||
[data-accordion]
|
||||
`,
|
||||
els =>
|
||||
els.map((el, index) => ({
|
||||
id: index,
|
||||
tag: el.tagName.toLowerCase(),
|
||||
text: el.innerText || "",
|
||||
role: el.getAttribute("role") || null,
|
||||
onclick: el.getAttribute("onclick"),
|
||||
href: el.getAttribute("href"),
|
||||
dataset: { ...el.dataset }
|
||||
}))
|
||||
);
|
||||
|
||||
for (const el of ui)
|
||||
el.friendly = friendlyLabel(el.text, el.role, el.dataset, el.tag, el.id);
|
||||
|
||||
return ui;
|
||||
}
|
||||
|
||||
async function getSections(page) {
|
||||
const sections = await page.$$eval(
|
||||
`
|
||||
section,
|
||||
[role='dialog'],
|
||||
[role='tabpanel'],
|
||||
[role='tablist'],
|
||||
.modal,
|
||||
[data-modal],
|
||||
[data-section],
|
||||
[data-tab-panel],
|
||||
[data-accordion-panel]
|
||||
`,
|
||||
els =>
|
||||
els.map((el, index) => ({
|
||||
id: el.id || index,
|
||||
tag: el.tagName.toLowerCase(),
|
||||
text: (el.innerText || "").slice(0, 200),
|
||||
role: el.getAttribute("role") || null,
|
||||
dataset: { ...el.dataset }
|
||||
}))
|
||||
);
|
||||
|
||||
for (const el of sections)
|
||||
el.friendly = friendlyLabel(el.text, el.role, el.dataset, el.tag, el.id);
|
||||
|
||||
return sections;
|
||||
}
|
||||
|
||||
async function scrollToReveal(page) {
|
||||
await page.evaluate(async () => {
|
||||
const total =
|
||||
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
|
||||
const step = Math.max(200, Math.floor(total / 5));
|
||||
for (let y = 0; y < total; y += step) {
|
||||
window.scrollTo(0, y);
|
||||
await new Promise(r => setTimeout(r, 150));
|
||||
}
|
||||
window.scrollTo(0, total);
|
||||
});
|
||||
}
|
||||
|
||||
async function discoverByClicking(page, url, depth) {
|
||||
await scrollToReveal(page);
|
||||
|
||||
const clickables = await page.$$eval(
|
||||
`
|
||||
button,
|
||||
[onclick],
|
||||
a[onclick]:not([href]),
|
||||
img[onclick],
|
||||
img[role='button'],
|
||||
img[tabindex],
|
||||
div[onclick],
|
||||
span[onclick],
|
||||
[role='button'],
|
||||
[data-toggle],
|
||||
[data-tab],
|
||||
[data-accordion]
|
||||
`,
|
||||
els =>
|
||||
els.map((el, index) => ({
|
||||
text: el.innerText || "",
|
||||
href: el.getAttribute("href"),
|
||||
onclick: el.getAttribute("onclick"),
|
||||
tag: el.tagName.toLowerCase(),
|
||||
role: el.getAttribute("role"),
|
||||
dataset: { ...el.dataset },
|
||||
index,
|
||||
xpath: (() => {
|
||||
let path = "";
|
||||
let current = el;
|
||||
while (current && current.nodeType === 1) {
|
||||
let idx = 1;
|
||||
let sibling = current.previousElementSibling;
|
||||
while (sibling) {
|
||||
if (sibling.tagName === current.tagName) idx++;
|
||||
sibling = sibling.previousElementSibling;
|
||||
}
|
||||
path = `/${current.tagName}[${idx}]` + path;
|
||||
current = current.parentElement;
|
||||
}
|
||||
return path;
|
||||
})()
|
||||
}))
|
||||
);
|
||||
|
||||
for (const info of clickables) {
|
||||
if (isLogoutElement(info)) continue;
|
||||
|
||||
try {
|
||||
const beforeLinks = new Set(
|
||||
await page.$$eval("a[href]", as => as.map(a => a.href))
|
||||
);
|
||||
|
||||
await page.evaluate(xpath => {
|
||||
const getNode = xp => {
|
||||
const result = document.evaluate(
|
||||
xp,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
return result.singleNodeValue;
|
||||
};
|
||||
const el = getNode(xpath);
|
||||
if (el) el.click();
|
||||
}, info.xpath);
|
||||
|
||||
await page.waitForTimeout(400);
|
||||
|
||||
const afterLinks = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href)
|
||||
);
|
||||
|
||||
for (const link of afterLinks) {
|
||||
if (!beforeLinks.has(link)) {
|
||||
const normalized = normalizeUrl(link);
|
||||
if (normalized) await crawl(page, normalized, depth + 1);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
async function performLoginIfNeeded(page, url) {
|
||||
for (const rule of loginConfig.logins) {
|
||||
if (url.includes(rule.match)) {
|
||||
for (const [name, value] of Object.entries(rule.fields)) {
|
||||
const sel = `input[name="${name}"]`;
|
||||
await page.waitForSelector(sel);
|
||||
await page.fill(sel, value);
|
||||
}
|
||||
|
||||
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
|
||||
await page.waitForSelector(submitSel);
|
||||
await page.click(submitSel);
|
||||
|
||||
await page.waitForLoadState("networkidle");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function crawl(page, rawUrl, depth) {
|
||||
const url = normalizeUrl(rawUrl);
|
||||
|
||||
if (depth > maxDepth) return;
|
||||
if (!matcher.allow(url)) return;
|
||||
if (visited.has(url)) return;
|
||||
|
||||
visited.add(url);
|
||||
|
||||
await page.goto(url, { waitUntil: "networkidle" });
|
||||
await performLoginIfNeeded(page, url);
|
||||
|
||||
const forms = await page.$$eval("form", forms =>
|
||||
forms.map(f => ({
|
||||
action: f.action,
|
||||
inputs: Array.from(f.querySelectorAll("input, textarea, select")).map(i => ({
|
||||
name: i.name,
|
||||
tag: i.tagName.toLowerCase(),
|
||||
type: i.type || null
|
||||
}))
|
||||
}))
|
||||
);
|
||||
|
||||
const links = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href).filter(h => h.startsWith("http"))
|
||||
);
|
||||
|
||||
const ui = await getUIElements(page);
|
||||
const sections = await getSections(page);
|
||||
|
||||
selectors[url] = {
|
||||
forms,
|
||||
links: links.map(href => ({ to: href })),
|
||||
ui,
|
||||
sections
|
||||
};
|
||||
|
||||
await discoverByClicking(page, url, depth);
|
||||
|
||||
const normalizedLinks = links
|
||||
.map(h => normalizeUrl(h))
|
||||
.filter(h => h);
|
||||
|
||||
for (const link of normalizedLinks) {
|
||||
await crawl(page, link, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await crawl(page, startURL, 0);
|
||||
|
||||
fs.writeFileSync("selectors.json", JSON.stringify(selectors, null, 2));
|
||||
await browser.close();
|
||||
})();
|
||||
|
||||
|
||||
// selectors-crawler.js
|
||||
//@3
|
||||
/*
|
||||
Writes to : selectors.json
|
||||
Consumed by : graph-utils.js
|
||||
: test-generator.js
|
||||
|
||||
console.log("Loading: selectors-crawler.js");
|
||||
|
||||
import fs from "fs";
|
||||
import { chromium } from "playwright";
|
||||
import { normalizeUrl } from "./utils/normalizeUrl.js";
|
||||
import { UrlPatternMatcher } from "./utils/urlPatternMatcher.js";
|
||||
|
||||
const config = JSON.parse(fs.readFileSync("./login-config.json", "utf8"));
|
||||
const {
|
||||
startURL,
|
||||
maxDepth,
|
||||
loginConfig,
|
||||
includePatterns = [],
|
||||
excludePatterns = [],
|
||||
matcherOptions = {}
|
||||
} = config;
|
||||
|
||||
const visited = new Set();
|
||||
const selectors = {};
|
||||
|
||||
const matcher = new UrlPatternMatcher(includePatterns, excludePatterns, matcherOptions);
|
||||
|
||||
const LOGOUT_KEYWORDS = ["logout", "signout", "logoff", "sign-out", "log-out"];
|
||||
|
||||
function isLogoutElement(info) {
|
||||
const text = (info.text || "").toLowerCase();
|
||||
const href = (info.href || "").toLowerCase();
|
||||
const onclick = (info.onclick || "").toLowerCase();
|
||||
return LOGOUT_KEYWORDS.some(k =>
|
||||
text.includes(k) || href.includes(k) || onclick.includes(k)
|
||||
);
|
||||
}
|
||||
|
||||
async function getUIElements(page) {
|
||||
return await page.$$eval(
|
||||
`
|
||||
button,
|
||||
[onclick],
|
||||
a[onclick]:not([href]),
|
||||
img[onclick],
|
||||
img[role='button'],
|
||||
img[tabindex],
|
||||
div[onclick],
|
||||
span[onclick],
|
||||
[role='button'],
|
||||
[data-toggle],
|
||||
[data-tab],
|
||||
[data-accordion]
|
||||
`,
|
||||
els =>
|
||||
els.map((el, index) => ({
|
||||
id: `ui-${index}`,
|
||||
tag: el.tagName.toLowerCase(),
|
||||
text: el.innerText || "",
|
||||
role: el.getAttribute("role") || null,
|
||||
onclick: el.getAttribute("onclick"),
|
||||
href: el.getAttribute("href"),
|
||||
dataset: { ...el.dataset }
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
async function getSections(page) {
|
||||
return await page.$$eval(
|
||||
`
|
||||
section,
|
||||
[role='dialog'],
|
||||
[role='tabpanel'],
|
||||
[role='tablist'],
|
||||
.modal,
|
||||
[data-modal],
|
||||
[data-section],
|
||||
[data-tab-panel],
|
||||
[data-accordion-panel]
|
||||
`,
|
||||
els =>
|
||||
els.map((el, index) => ({
|
||||
id: el.id || `section-${index}`,
|
||||
tag: el.tagName.toLowerCase(),
|
||||
text: (el.innerText || "").slice(0, 200),
|
||||
role: el.getAttribute("role") || null,
|
||||
dataset: { ...el.dataset }
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
async function scrollToReveal(page) {
|
||||
await page.evaluate(async () => {
|
||||
const total =
|
||||
document.body.scrollHeight || document.documentElement.scrollHeight || 0;
|
||||
const step = Math.max(200, Math.floor(total / 5));
|
||||
for (let y = 0; y < total; y += step) {
|
||||
window.scrollTo(0, y);
|
||||
await new Promise(r => setTimeout(r, 150));
|
||||
}
|
||||
window.scrollTo(0, total);
|
||||
});
|
||||
}
|
||||
|
||||
async function discoverByClicking(page, url, depth) {
|
||||
await scrollToReveal(page);
|
||||
|
||||
const clickables = await page.$$eval(
|
||||
`
|
||||
button,
|
||||
[onclick],
|
||||
a[onclick]:not([href]),
|
||||
img[onclick],
|
||||
img[role='button'],
|
||||
img[tabindex],
|
||||
div[onclick],
|
||||
span[onclick],
|
||||
[role='button'],
|
||||
[data-toggle],
|
||||
[data-tab],
|
||||
[data-accordion]
|
||||
`,
|
||||
els =>
|
||||
els.map(el => ({
|
||||
text: el.innerText || "",
|
||||
href: el.getAttribute("href"),
|
||||
onclick: el.getAttribute("onclick"),
|
||||
xpath: (() => {
|
||||
let path = "";
|
||||
let current = el;
|
||||
while (current && current.nodeType === 1) {
|
||||
let index = 1;
|
||||
let sibling = current.previousElementSibling;
|
||||
while (sibling) {
|
||||
if (sibling.tagName === current.tagName) index++;
|
||||
sibling = sibling.previousElementSibling;
|
||||
}
|
||||
path = `/${current.tagName}[${index}]` + path;
|
||||
current = current.parentElement;
|
||||
}
|
||||
return path;
|
||||
})()
|
||||
}))
|
||||
);
|
||||
|
||||
for (const info of clickables) {
|
||||
if (isLogoutElement(info)) continue;
|
||||
|
||||
try {
|
||||
const beforeLinks = new Set(
|
||||
await page.$$eval("a[href]", as => as.map(a => a.href))
|
||||
);
|
||||
|
||||
await page.evaluate(xpath => {
|
||||
const getNode = xp => {
|
||||
const result = document.evaluate(
|
||||
xp,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
return result.singleNodeValue;
|
||||
};
|
||||
const el = getNode(xpath);
|
||||
if (el) el.click();
|
||||
}, info.xpath);
|
||||
|
||||
await page.waitForTimeout(400);
|
||||
|
||||
const afterLinks = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href)
|
||||
);
|
||||
|
||||
for (const link of afterLinks) {
|
||||
if (!beforeLinks.has(link)) {
|
||||
const normalized = normalizeUrl(link);
|
||||
if (normalized) await crawl(page, normalized, depth + 1);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
async function performLoginIfNeeded(page, url) {
|
||||
for (const rule of loginConfig.logins) {
|
||||
if (url.includes(rule.match)) {
|
||||
for (const [name, value] of Object.entries(rule.fields)) {
|
||||
const sel = `input[name="${name}"]`;
|
||||
await page.waitForSelector(sel);
|
||||
await page.fill(sel, value);
|
||||
}
|
||||
|
||||
const submitSel = `[name="${rule.submit}"], button[name="${rule.submit}"]`;
|
||||
await page.waitForSelector(submitSel);
|
||||
await page.click(submitSel);
|
||||
|
||||
await page.waitForLoadState("networkidle");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function crawl(page, rawUrl, depth) {
|
||||
const url = normalizeUrl(rawUrl);
|
||||
|
||||
if (depth > maxDepth) return;
|
||||
if (!matcher.allow(url)) return;
|
||||
if (visited.has(url)) return;
|
||||
|
||||
visited.add(url);
|
||||
|
||||
await page.goto(url, { waitUntil: "networkidle" });
|
||||
await performLoginIfNeeded(page, url);
|
||||
|
||||
const forms = await page.$$eval("form", forms =>
|
||||
forms.map(f => ({
|
||||
action: f.action,
|
||||
inputs: Array.from(f.querySelectorAll("input, textarea, select")).map(i => ({
|
||||
name: i.name,
|
||||
tag: i.tagName.toLowerCase(),
|
||||
type: i.type || null
|
||||
}))
|
||||
}))
|
||||
);
|
||||
|
||||
const links = await page.$$eval("a[href]", as =>
|
||||
as.map(a => a.href).filter(h => h.startsWith("http"))
|
||||
);
|
||||
|
||||
const ui = await getUIElements(page);
|
||||
const sections = await getSections(page);
|
||||
|
||||
selectors[url] = {
|
||||
forms,
|
||||
links: links.map(href => ({ to: href })),
|
||||
ui,
|
||||
sections
|
||||
};
|
||||
|
||||
await discoverByClicking(page, url, depth);
|
||||
|
||||
const normalizedLinks = links
|
||||
.map(h => normalizeUrl(h))
|
||||
.filter(h => h);
|
||||
|
||||
for (const link of normalizedLinks) {
|
||||
await crawl(page, link, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await crawl(page, startURL, 0);
|
||||
|
||||
fs.writeFileSync("selectors.json", JSON.stringify(selectors, null, 2));
|
||||
await browser.close();
|
||||
})();
|
||||
*/
|
||||
77
selectors.json
Normal file
77
selectors.json
Normal file
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"https://examples.eze2e.com/login/cookie/login.php": {
|
||||
"forms": [],
|
||||
"links": [
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php#"
|
||||
},
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php"
|
||||
},
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php"
|
||||
}
|
||||
],
|
||||
"ui": [],
|
||||
"sections": []
|
||||
},
|
||||
"https://examples.eze2e.com/login/cookie/index.php": {
|
||||
"forms": [],
|
||||
"links": [
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php#"
|
||||
},
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php"
|
||||
},
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php"
|
||||
}
|
||||
],
|
||||
"ui": [],
|
||||
"sections": []
|
||||
},
|
||||
"https://examples.eze2e.com/login/cookie/weather.php": {
|
||||
"forms": [],
|
||||
"links": [
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/index.php"
|
||||
},
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/weather.php#"
|
||||
},
|
||||
{
|
||||
"to": "https://examples.eze2e.com/login/cookie/logout.php"
|
||||
}
|
||||
],
|
||||
"ui": [],
|
||||
"sections": []
|
||||
},
|
||||
"https://examples.eze2e.com/login/cookie/logout.php": {
|
||||
"forms": [
|
||||
{
|
||||
"action": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"inputs": [
|
||||
{
|
||||
"name": "user",
|
||||
"tag": "input",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"name": "pass",
|
||||
"tag": "input",
|
||||
"type": "password"
|
||||
},
|
||||
{
|
||||
"name": "login",
|
||||
"tag": "input",
|
||||
"type": "submit"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"links": [],
|
||||
"ui": [],
|
||||
"sections": []
|
||||
}
|
||||
}
|
||||
22
status.json
Normal file
22
status.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"https://examples.eze2e.com/login/cookie/login.php": {
|
||||
"status": 200,
|
||||
"finalUrl": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"soft404": false
|
||||
},
|
||||
"https://examples.eze2e.com/login/cookie/index.php": {
|
||||
"status": 200,
|
||||
"finalUrl": "https://examples.eze2e.com/login/cookie/index.php",
|
||||
"soft404": false
|
||||
},
|
||||
"https://examples.eze2e.com/login/cookie/weather.php": {
|
||||
"status": 200,
|
||||
"finalUrl": "https://examples.eze2e.com/login/cookie/weather.php",
|
||||
"soft404": false
|
||||
},
|
||||
"https://examples.eze2e.com/login/cookie/logout.php": {
|
||||
"status": 200,
|
||||
"finalUrl": "https://examples.eze2e.com/login/cookie/login.php",
|
||||
"soft404": false
|
||||
}
|
||||
}
|
||||
50
test-executor.js
Normal file
50
test-executor.js
Normal file
@@ -0,0 +1,50 @@
|
||||
//@5
|
||||
/*
|
||||
Consumes : `action-tests.json`
|
||||
Writes to : [console]
|
||||
*/
|
||||
|
||||
// test-executor.js
|
||||
console.log("Loading: test-executor.js");
|
||||
|
||||
import fs from "fs";
|
||||
import { chromium } from "playwright";
|
||||
|
||||
const tests = JSON.parse(fs.readFileSync("./action-tests.json", "utf8")).actionTests;
|
||||
|
||||
function normalizeValue(value) {
|
||||
if (typeof value === "string" && value.startsWith("crawl_")) {
|
||||
return value.replace(/^crawl_/, "");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const browser = await chromium.launch({ headless: false });
|
||||
const page = await browser.newPage();
|
||||
|
||||
for (const test of tests) {
|
||||
try {
|
||||
await page.goto(test.url, { waitUntil: "networkidle" });
|
||||
|
||||
if (test.type === "input") {
|
||||
const value = normalizeValue(test.value);
|
||||
await page.waitForSelector(test.selector);
|
||||
await page.fill(test.selector, value);
|
||||
}
|
||||
|
||||
if (test.type === "click") {
|
||||
await page.waitForSelector(test.selector);
|
||||
await page.click(test.selector);
|
||||
}
|
||||
|
||||
console.log("✓", test.type.toUpperCase(), test.selector);
|
||||
|
||||
} catch (err) {
|
||||
console.error("✗ Test failed:", test, err);
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
})();
|
||||
|
||||
87
test-generator.js
Normal file
87
test-generator.js
Normal file
@@ -0,0 +1,87 @@
|
||||
// test-generator.js
|
||||
//@5
|
||||
/*
|
||||
Reads from : flows.json
|
||||
Writes to : tests/
|
||||
Short friendly labels enabled
|
||||
*/
|
||||
|
||||
console.log("Loading: test-generator.js");
|
||||
|
||||
import fs from "fs";
|
||||
|
||||
const data = JSON.parse(fs.readFileSync("./flows.json", "utf8"));
|
||||
const flows = data.flows || [];
|
||||
|
||||
if (!fs.existsSync("tests")) fs.mkdirSync("tests");
|
||||
|
||||
let counter = 1;
|
||||
|
||||
for (const flow of flows) {
|
||||
const testName = `test-${counter}.spec.js`;
|
||||
|
||||
const steps = flow
|
||||
.map((url, i) => {
|
||||
return ` await page.goto("${url}", { waitUntil: "networkidle" });`;
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
const content = `
|
||||
import { test } from "@playwright/test";
|
||||
|
||||
test("Flow ${counter}", async ({ page }) => {
|
||||
${steps}
|
||||
});
|
||||
`.trimStart();
|
||||
|
||||
fs.writeFileSync(`tests/${testName}`, content);
|
||||
counter++;
|
||||
}
|
||||
|
||||
console.log("Generated tests/");
|
||||
|
||||
|
||||
// test-generator.js
|
||||
//@5
|
||||
/*
|
||||
Reads from : flows.json
|
||||
Writes to : tests/
|
||||
|
||||
console.log("Loading: test-generator.js");
|
||||
|
||||
import fs from "fs";
|
||||
|
||||
const data = JSON.parse(fs.readFileSync("./flows.json", "utf8"));
|
||||
const flows = data.flows || [];
|
||||
|
||||
if (!fs.existsSync("tests")) fs.mkdirSync("tests");
|
||||
|
||||
let counter = 1;
|
||||
|
||||
for (const flow of flows) {
|
||||
const testName = `test-${counter}.spec.js`;
|
||||
|
||||
const steps = flow
|
||||
.map((url, i) => {
|
||||
if (i === 0) {
|
||||
return ` await page.goto("${url}", { waitUntil: "networkidle" });`;
|
||||
}
|
||||
return ` await page.goto("${url}", { waitUntil: "networkidle" });`;
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
const content = `
|
||||
import { test } from "@playwright/test";
|
||||
|
||||
test("Flow ${counter}", async ({ page }) => {
|
||||
${steps}
|
||||
});
|
||||
`.trimStart();
|
||||
|
||||
fs.writeFileSync(`tests/${testName}`, content);
|
||||
counter++;
|
||||
}
|
||||
|
||||
console.log("Generated tests/");
|
||||
|
||||
*/
|
||||
5
tests/test-1.spec.js
Normal file
5
tests/test-1.spec.js
Normal file
@@ -0,0 +1,5 @@
|
||||
import { test } from "@playwright/test";
|
||||
|
||||
test("Flow 1", async ({ page }) => {
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
|
||||
});
|
||||
6
tests/test-2.spec.js
Normal file
6
tests/test-2.spec.js
Normal file
@@ -0,0 +1,6 @@
|
||||
import { test } from "@playwright/test";
|
||||
|
||||
test("Flow 2", async ({ page }) => {
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/index.php", { waitUntil: "networkidle" });
|
||||
});
|
||||
7
tests/test-3.spec.js
Normal file
7
tests/test-3.spec.js
Normal file
@@ -0,0 +1,7 @@
|
||||
import { test } from "@playwright/test";
|
||||
|
||||
test("Flow 3", async ({ page }) => {
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/index.php", { waitUntil: "networkidle" });
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/weather.php", { waitUntil: "networkidle" });
|
||||
});
|
||||
8
tests/test-4.spec.js
Normal file
8
tests/test-4.spec.js
Normal file
@@ -0,0 +1,8 @@
|
||||
import { test } from "@playwright/test";
|
||||
|
||||
test("Flow 4", async ({ page }) => {
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/login.php", { waitUntil: "networkidle" });
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/index.php", { waitUntil: "networkidle" });
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/weather.php", { waitUntil: "networkidle" });
|
||||
await page.goto("https://examples.eze2e.com/login/cookie/logout.php", { waitUntil: "networkidle" });
|
||||
});
|
||||
51
utils/normalizeUrl.js
Normal file
51
utils/normalizeUrl.js
Normal file
@@ -0,0 +1,51 @@
|
||||
// utils/normalizeUrl.js
|
||||
|
||||
export function normalizeUrl(raw) {
|
||||
if (!raw) return null;
|
||||
|
||||
try {
|
||||
raw = raw.trim();
|
||||
|
||||
if (/^(javascript:|mailto:|tel:|data:)/i.test(raw)) return null;
|
||||
|
||||
const noHash = raw.split("#")[0];
|
||||
|
||||
const url = new URL(noHash);
|
||||
|
||||
url.protocol = url.protocol.toLowerCase();
|
||||
url.hostname = url.hostname.toLowerCase();
|
||||
|
||||
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
|
||||
url.pathname = url.pathname.slice(0, -1);
|
||||
}
|
||||
|
||||
return url.toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
// utils/normalizeUrl.js
|
||||
console.log("Loading: utils/normalizeUrl.js");
|
||||
export function normalizeUrl(rawUrl) {
|
||||
try {
|
||||
const url = new URL(rawUrl);
|
||||
url.hash = "";
|
||||
url.hostname = url.hostname.toLowerCase();
|
||||
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
|
||||
url.pathname = url.pathname.slice(0, -1);
|
||||
}
|
||||
if (url.searchParams && [...url.searchParams].length > 1) {
|
||||
const sorted = [...url.searchParams.entries()].sort();
|
||||
url.search = "";
|
||||
for (const [k, v] of sorted) url.searchParams.append(k, v);
|
||||
}
|
||||
return url.toString();
|
||||
} catch {
|
||||
return rawUrl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
136
utils/urlPatternMatcher.js
Normal file
136
utils/urlPatternMatcher.js
Normal file
@@ -0,0 +1,136 @@
|
||||
// utils/urlPatternMatcher.js
|
||||
|
||||
export class UrlPatternMatcher {
|
||||
constructor(includePatterns = [], excludePatterns = [], options = {}) {
|
||||
this.include = includePatterns;
|
||||
this.exclude = excludePatterns;
|
||||
this.debug = options.debug || false;
|
||||
}
|
||||
|
||||
log(...args) {
|
||||
if (this.debug) console.log("[UrlPatternMatcher]", ...args);
|
||||
}
|
||||
|
||||
allow(url) {
|
||||
for (const pat of this.exclude) {
|
||||
if (this.match(url, pat)) {
|
||||
this.log("EXCLUDE:", pat, "→", url);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let included = false;
|
||||
for (const pat of this.include) {
|
||||
if (this.match(url, pat)) {
|
||||
included = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!included) {
|
||||
this.log("NO INCLUDE:", url);
|
||||
return false;
|
||||
}
|
||||
|
||||
this.log("ALLOW:", url);
|
||||
return true;
|
||||
}
|
||||
|
||||
match(url, pattern) {
|
||||
if (!pattern) return false;
|
||||
|
||||
if (pattern.startsWith("^")) {
|
||||
try {
|
||||
const re = new RegExp(pattern);
|
||||
const ok = re.test(url);
|
||||
this.log("REGEX:", pattern, "→", ok);
|
||||
return ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const ok = url.includes(pattern);
|
||||
this.log("SUBSTR:", `"${pattern}" in "${url}" →`, ok);
|
||||
return ok;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
console.log("Loading: utils/urlPatternMatcher.js");
|
||||
|
||||
export class UrlPatternMatcher {
|
||||
constructor(includePatterns = [], excludePatterns = [], options = {}) {
|
||||
this.includePatterns = includePatterns;
|
||||
this.excludePatterns = excludePatterns;
|
||||
|
||||
this.options = {
|
||||
logging: false,
|
||||
...options
|
||||
};
|
||||
}
|
||||
|
||||
log(...args) {
|
||||
if (this.options.logging) {
|
||||
console.log("[UrlPatternMatcher]", ...args);
|
||||
}
|
||||
}
|
||||
|
||||
isRegex(pattern) {
|
||||
return (
|
||||
pattern.startsWith("^") ||
|
||||
pattern.endsWith("$") ||
|
||||
pattern.includes(".*") ||
|
||||
pattern.includes("\\")
|
||||
);
|
||||
}
|
||||
|
||||
match(url, pattern) {
|
||||
if (this.isRegex(pattern)) {
|
||||
const re = new RegExp(pattern);
|
||||
const ok = re.test(url);
|
||||
this.log(`Regex test: ${pattern} → ${ok}`);
|
||||
return ok;
|
||||
}
|
||||
|
||||
const ok = url.includes(pattern);
|
||||
this.log(`Substring test: "${pattern}" in "${url}" → ${ok}`);
|
||||
return ok;
|
||||
}
|
||||
|
||||
matchesAny(url, patterns) {
|
||||
return patterns.some(p => this.match(url, p));
|
||||
}
|
||||
|
||||
shouldExclude(url) {
|
||||
const excluded = this.matchesAny(url, this.excludePatterns);
|
||||
if (excluded) {
|
||||
this.log(`EXCLUDED: ${url}`);
|
||||
}
|
||||
return excluded;
|
||||
}
|
||||
|
||||
shouldInclude(url) {
|
||||
if (!this.includePatterns.length) {
|
||||
this.log(`Included by default (no includePatterns): ${url}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
const included = this.matchesAny(url, this.includePatterns);
|
||||
this.log(`Include check for ${url} → ${included}`);
|
||||
return included;
|
||||
}
|
||||
|
||||
allow(url) {
|
||||
if (this.shouldExclude(url)) {
|
||||
this.log(`Final decision: BLOCKED → ${url}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
const allowed = this.shouldInclude(url);
|
||||
this.log(`Final decision: ${allowed ? "ALLOW" : "BLOCK"} → ${url}`);
|
||||
return allowed;
|
||||
}
|
||||
}
|
||||
*/
|
||||
Reference in New Issue
Block a user