Skip to content

Commit 6e390ee

Browse files
committed
[scramjet/core] detect and properly handle quirky document structure (avoid scripts running before scramjet client)
1 parent 3e800a3 commit 6e390ee

File tree

1 file changed

+72
-16
lines changed
  • packages/scramjet/packages/core/src/shared/rewriters

1 file changed

+72
-16
lines changed

packages/scramjet/packages/core/src/shared/rewriters/html.ts

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,30 +25,86 @@ function rewriteHtmlInner(
2525
if (preRewrite) preRewrite(handler);
2626
traverseParsedHtml(handler.root, cookieJar, meta);
2727

28-
function findhead(node) {
29-
if (node.type === ElementType.Tag && node.name === "head") {
30-
return node as Element;
31-
} else if (node.childNodes) {
32-
for (const child of node.childNodes) {
33-
const head = findhead(child);
34-
if (head) return head;
28+
let htmlRoot: Element | undefined;
29+
let headElement: Element | undefined;
30+
let bodyElement: Element | undefined;
31+
32+
function detectQuirks() {
33+
for (const child of handler.root.childNodes) {
34+
if (
35+
child.type === ElementType.Directive ||
36+
child.type === ElementType.Comment ||
37+
child.type === ElementType.Text
38+
) {
39+
continue;
40+
}
41+
42+
if (child.type === ElementType.Tag && child.name === "html") {
43+
htmlRoot = child as Element;
44+
} else {
45+
// there's a child of the root that isn't an html element or a doctype/comment/text
46+
return true;
3547
}
3648
}
3749

38-
return null;
39-
}
50+
if (!htmlRoot) return true; // no html tag or it's somewhere else other than first child
4051

41-
if (fromTop) {
42-
let head = findhead(handler.root);
43-
if (!head) {
44-
head = new Element("head", {}, []);
45-
handler.root.children.unshift(head);
52+
for (const child of htmlRoot.childNodes) {
53+
if (
54+
child.type === ElementType.Directive ||
55+
child.type === ElementType.Comment ||
56+
child.type === ElementType.Text
57+
) {
58+
continue;
59+
}
60+
61+
if (child.type === ElementType.Tag && child.name === "head") {
62+
if (bodyElement) {
63+
// head comes after body
64+
return true;
65+
}
66+
headElement = child as Element;
67+
} else if (child.type === ElementType.Tag && child.name === "body") {
68+
bodyElement = child as Element;
69+
} else {
70+
// there's a child of html that isn't head or body
71+
// fine if head already exists, bad if it doesn't
72+
if (!headElement) {
73+
return true;
74+
}
75+
}
76+
77+
return false;
4678
}
79+
}
80+
81+
let isQuirky = detectQuirks();
4782

83+
if (fromTop) {
4884
const script = (src: string) => new Element("script", { src });
49-
head.children.unshift(
50-
...iface.getInjectScripts(meta, handler, config, cookieJar, script)
85+
const injectScripts = iface.getInjectScripts(
86+
meta,
87+
handler,
88+
config,
89+
cookieJar,
90+
script
5191
);
92+
93+
if (isQuirky) {
94+
dbg.warn(
95+
`detected quirky document structure parsing @ ${meta.origin.href}!`
96+
);
97+
// there's weird stuff going on with the document that could result in page scripts being loaded before our inject scripts
98+
// so inject them at position 0
99+
handler.root.children.unshift(...injectScripts);
100+
} else {
101+
if (!headElement) {
102+
headElement = new Element("head", {}, []);
103+
htmlRoot.children.unshift(headElement);
104+
}
105+
106+
headElement.children.unshift(...injectScripts);
107+
}
52108
}
53109

54110
if (postRewrite) postRewrite(handler);

0 commit comments

Comments
 (0)