Reputation: 18513
I have a vanity URL pointing to a GitBook. GitBook doesn't support the insertion of arbitrary javascript snippets. At the moment GitBook has 4 "integrations" only.
I could route through my own VM server to accomplish this, but I have CloudFlare and I want to try out workers. (Javascript running at the CDN edge).
The CloudFlare worker environment makes header injection very easy, but there is no obvious way to do this.
Upvotes: 3
Views: 3991
Reputation: 18513
It's important to process with a TransformStream so that processing is async and doesn't require memory buffering (for scalability and to minimise GC) - there's only a 5ms CPU time budget.
Overview:
forHeadStart
, forHeadEnd
, and forBodyEnd
. headInjection
and/or bodyInjection
. Uncommenting related code, including code in injectScripts
, and setting the strings for tagBytes that will be encoded.Code
addEventListener('fetch', event => {
event.passThroughOnException();
event.respondWith(handleRequest(event.request))
})
/**
* Fetch and log a request
* @param {Request} request
*/
async function handleRequest(request) {
const response = await fetch(request);
var ctype = response.headers.get('content-type');
if (ctype.startsWith('text/html') === false)
return response; //Only parse html body
let { readable, writable } = new TransformStream();
let promise = injectScripts(response.body, writable);
return new Response(readable, response);
}
let encoder = new TextEncoder('utf-8');
let deferredInjection = function() {
let forHeadStart = `<script>var test = 1; //Start of head section</script>`;
let forHeadEnd = `<script>var test = 2; //End of head section</script>`;
let forBodyEnd = `<script>var test = 3; //End of body section</script><button>click</button>`;
let helper = `
${forHeadStart}
<script>
function appendHtmlTo(element, htmlContent) {
var temp = document.createElement('div');
temp.innerHTML = htmlContent;
while (temp.firstChild) {
element.appendChild(temp.firstChild);
};
}
let forHeadEnd = "${ btoa(forHeadEnd) }";
let forBodyEnd = "${ btoa(forBodyEnd) }";
if (forHeadEnd.length > 0) appendHtmlTo(document.head, atob(forHeadEnd));
if (forBodyEnd.length > 0) window.onload = function() {
appendHtmlTo(document.body, atob(forBodyEnd));
};
</script>
`;
return {
forInjection: encoder.encode(helper),
tagBytes: encoder.encode("<head>"),
insertAfterTag: true
};
}();
// let headInjection = {
// forInjection: encoder.encode("<script>var test = 1;</script>"),
// tagBytes: encoder.encode("</head>"), //case sensitive
// insertAfterTag: false
// };
// let bodyInjection = {
// forInjection: encoder.encode("<script>var test = 1;</script>"),
// tagBytes: encoder.encode("</body>"), //case sensitive
// insertAfterTag: false
// }
//console.log(bodyTagBytes);
encoder = null;
async function injectScripts(readable, writable) {
let processingState = {
readStream: readable,
writeStream: writable,
reader: readable.getReader(),
writer: writable.getWriter(),
leftOvers: null, //data left over after a closing tag is found
inputDone: false,
result: {charactersFound: 0, foundIndex: -1, afterHeadTag: -1} //Reused object for the duration of the request
};
await parseForInjection(processingState, deferredInjection);
//await parseForInjection(processingState, headInjection);
//await parseForInjection(processingState, bodyInjection);
await forwardTheRest(processingState);
}
///Return object will have foundIndex: -1, if there is no match, and no partial match at the end of the array
///If there is an exact match, return object will have charactersFound:(tagBytes.Length)
///If there is a partial match at the end of the array, return object charactersFound will be < (tagBytes.Length)
///The result object needs to be passed in to reduce Garbage Collection - we can reuse the object
function searchByteArrayChunkForClosingTag(chunk, tagBytes, result)
{
//console.log('search');
let searchStart = 0;
//console.log(tagBytes.length);
//console.log(chunk.length);
for (;;) {
result.charactersFound = 0;
result.foundIndex = -1;
result.afterHeadTag = -1;
//console.log(result);
let sweepIndex = chunk.indexOf(tagBytes[0], searchStart);
if (sweepIndex === -1)
return; //Definitely not found
result.foundIndex = sweepIndex;
sweepIndex++;
searchStart = sweepIndex; //where we start searching from next
result.charactersFound++;
result.afterHeadTag = sweepIndex;
//console.log(result);
for (let i = 1; i < tagBytes.length; i++)
{
if (sweepIndex === chunk.length) return; //Partial match
if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; } //Failed to match (even partially to boundary)
result.charactersFound++;
result.afterHeadTag = sweepIndex; //Because we work around the actual found tag in case it's across a boundary
}
if (result.charactersFound === tagBytes.length)
return; //Found
}
}
function continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result)
{
//console.log('continue');
//Finish the search (no need to check the last buffer at all)
//console.log('finish the search');
result.charactersFound = lastSplitResult.charactersFound; //We'll be building on the progress from the lastSplitResult
result.foundIndex = (-1 * result.charactersFound); //This won't be used, but a negative value is indicative of chunk spanning
let sweepIndex = 0;
result.afterHeadTag = 0;
for (let i = lastSplitResult.charactersFound; i < tagBytes.length; i++) //Zero-based
{
if (sweepIndex === chunk.length) return result; //So we support working on a chunk that's smaller than the tagBytes search size
if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; }
result.charactersFound++;
result.afterHeadTag = sweepIndex;
}
}
function continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
{
//console.log('continueOrNewSearch');
if (lastSplitResult == null)
searchByteArrayChunkForClosingTag(chunk, tagBytes, result);
else
{
continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result);
if (result.charactersFound === tagBytes.length)
return result;
else
return searchByteArrayChunkForClosingTag(chunk, tagBytes, result); //Keep searching onward
}
}
async function parseForInjection(processingState, injectionJob)
{
if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?
if (!injectionJob) return;
if (!injectionJob.tagBytes) return;
if (!injectionJob.forInjection) return;
let reader = processingState.reader;
let writer = processingState.writer;
let result = processingState.result;
let tagBytes = injectionJob.tagBytes;
//(reader, writer, tagBytes, forInjection)
let lastSplitResult = null;
let chunk = null;
processingState.inputDone = false;
for (;;) {
if (processingState.leftOvers)
{
chunk = processingState.leftOvers;
processingState.leftOvers = null;
}
else
{
let readerResult = await reader.read();
chunk = readerResult.value;
processingState.inputDone = readerResult.done;
}
if (processingState.inputDone) {
if (lastSplitResult !== null) {
//Very edge case: Somehow tagBytes is never found?
console.log('edge');
throw 'tag not found'; //Causing the system to fall back to the direct request
}
await writer.close();
return true;
}
//console.log(value.length);
continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
//console.log(result);
if (result.charactersFound === tagBytes.length) //Complete match
{
//Inject
//console.log('inject');
if (result.foundIndex > 0)
{
let partValue = chunk.slice(0, result.foundIndex);
//console.log(partValue);
await writer.write(partValue);
}
console.log('injected');
if (parseForInjection.insertAfterTag)
{
await writer.write(injectionJob.forInjection);
await writer.write(injectionJob.tagBytes);
}
else
{
await writer.write(injectionJob.tagBytes);
await writer.write(injectionJob.forInjection);
}
let remainder = chunk.slice(result.afterHeadTag, chunk.length - 1);
processingState.leftOvers = remainder;
lastSplitResult = null;
return;
}
if (lastSplitResult !== null)
{
//console.log('no match over boundary');
//The remainder wasn't found, so write the partial match from before (maybe `<` or `</`)
let failedLastBit = injectionJob.tagBytes.slice(0, lastSplitResult.charactersFound);
await writer.write(failedLastBit);
lastSplitResult = null;
}
if (result.charactersFound === 0)
{
//console.log('not found')
await writer.write(chunk);
continue;
}
if (result.charactersFound < tagBytes.length)
{
//console.log('boundary: ' + result.charactersFound);
lastSplitResult = result;
let partValue = chunk.slice(0, result.foundIndex);
//console.log(partValue);
await writer.write(partValue);
continue;
}
}
}
async function forwardTheRest(processingState)
{
try
{
if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?
if (processingState.leftOvers)
{
chunk = processingState.leftOvers;
await processingState.writer.write(chunk);
}
processingState.reader.releaseLock();
processingState.writer.releaseLock();
await processingState.readStream.pipeTo(processingState.writeStream);
//Should there be an explicit close method called? I couldn't find one
}
catch (e)
{
console.log(e);
}
}
Further explanation of working directly with (utf-8) bytes:
</head>
which is made up of lower-than-128 bytes, very easy to work with.continueOrNewSearch
and the two sub-functions to be an interesting approach to finding multi-bytes across a chunk boundary. Up until the boundary we just count how many bytes are found. There's no need to keep those bytes (we know what they are). Then on the next chunk we continue where we left off. We always cut the array buffer around the header, and make sure we write the header bytes (using the tagBytes)Upvotes: 7