/**
* @module whatwg-misc
* @desc Helper module - Helper functions for the main module {@link module:whatwg-xhr whatwg-xhr}.
* @version 1.0.0
* @author Essam A. El-Sherif
*/
/* Import nodeJS core modules */
import assert from 'node:assert/strict';
import { atob } from 'node:buffer';
/*
* HTTP token code point
* url: https://mimesniff.spec.whatwg.org/#http-token-code-point
*
* @const HTTP_TOKEN_CODEPOINTS
* @desc An HTTP token code point, as defined by {@link https://mimesniff.spec.whatwg.org/#http-token-code-point WHATWG MIME Sniffing Living Standard}.
*/
const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-z0-9]+$/;
/*
* A whitespace byte (abbreviated 0xWS)
* url: https://mimesniff.spec.whatwg.org/#whitespace-byte
*
* @const HTTP_WHITESPACE_REGEX
* @desc A whitespace byte, as defined by {@link https://mimesniff.spec.whatwg.org/#whitespace-byte WHATWG MIME Sniffing Living Standard}.
*/
const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/;
/*
* HTTP quoted-string token code point
* url: https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
*
* @const HTTP_QUOTED_STRING_TOKENS
* @desc HTTP quoted-string token code point, as defined by {@link https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point WHATWG MIME Sniffing Living Standard}.
*/
const HTTP_QUOTED_STRING_TOKENS = /^(\u0009|\x{0020}-\x{007E}|\x{0080}-\x{00FF})+$/;
/**
* The data: URL processor takes a URL dataURL and then runs these steps:
* url: https://fetch.spec.whatwg.org/#data-url-processor
*
* 1. Assert: dataURL’s scheme is "data".
* 2. Let input be the result of running the URL serializer on dataURL with exclude fragment set to true.
* 3. Remove the leading "data:" from input.
* 4. Let position point at the start of input.
* 5. Let mimeType be the result of collecting a sequence of code points that are not equal to U+002C (,), given position.
* 6. Strip leading and trailing ASCII whitespace from mimeType.
* Note: This will only remove U+0020 SPACE code points, if any.
* 7. If position is past the end of input, then return failure.
* 8. Advance position by 1.
* 9. Let encodedBody be the remainder of input.
* 10. Let body be the percent-decoding of encodedBody.
* 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE, followed by an ASCII case-insensitive match for "base64", then:
* 1. Let stringBody be the isomorphic decode of body.
* 2. Set body to the forgiving-base64 decode of stringBody.
* 3. If body is failure, then return failure.
* 4. Remove the last 6 code points from mimeType.
* 5. Remove trailing U+0020 SPACE code points from mimeType, if any.
* 6. Remove the last U+003B (;) from mimeType.
* 12. If mimeType starts with ";", then prepend "text/plain" to mimeType.
* 13. Let mimeTypeRecord be the result of parsing mimeType.
* 14. If mimeTypeRecord is failure, then set mimeTypeRecord to text/plain;charset=US-ASCII.
* 15. Return a new data: URL struct whose MIME type is mimeTypeRecord and body is body.
*
* @func dataURLProcessor
* @static
* @param {URL} dataURL - URL object.
* @return {object|string} An object of mimeType and body, or 'failure' string.
* @desc A dataURLProcessor function, as defined by {@link https://fetch.spec.whatwg.org/#data-url-processor WHATWG Fetch Living Standard}.
*/
export function dataURLProcessor(dataURL){
// fetch.spec.1. Assert: dataURL’s scheme is "data".
assert(dataURL.protocol === 'data:');
// fetch.spec.2. Let input be the result of running the URL
// serializer on dataURL with exclude fragment
// set to true.
let input = urlSerializer(dataURL, true);
// fetch.spec.3. Remove the leading "data:" string from input.
input = input.slice(5);
// fetch.spec.4. Let position point at the start of input.
const position = { position: 0 };
// fetch.spec.5. Let mimeType be the result of collecting a
// sequence of code points that are not equal
// to U+002C (,), given position.
let mimeType = collectASequenceOfCodePointsFast(
',',
input,
position
);
// fetch.spec.6. Strip leading and trailing ASCII whitespace from mimeType.
// Note: This will only remove U+0020 SPACE code points, if any.
// Undici implementation note: we need to store the length because
// if the mimetype has spaces removed, the wrong amount will be
// sliced from the input in step #9.
const mimeTypeLength = mimeType.length;
mimeType = mimeType.replace(/^(\u0020)+|(\u0020)+$/g, '');
// fetch.spec.7. If position is past the end of input, then return failure
if(position.position >= input.length)
return 'failure';
// fetch.spec.8. Advance position by 1.
position.position++;
// fetch.spec.9. Let encodedBody be the remainder of input.
const encodedBody = input.slice(mimeTypeLength + 1);
// fetch.spec.10. Let body be the percent-decoding of encodedBody.
let body = stringPercentDecode(encodedBody);
// fetch.spec.11. If mimeType ends with U+003B (;), followed by
// zero or more U+0020 SPACE, followed by an ASCII
// case-insensitive match for "base64", then:
if(/;(\u0020){0,}base64$/i.test(mimeType)){
// fetch.spec.11.1. Let stringBody be the isomorphic decode of body.
const stringBody = isomorphicDecode(body);
// fetch.spec.11.2. Set body to the forgiving-base64 decode of stringBody.
body = forgivingBase64(stringBody);
// fetch.spec.11.3. If body is failure, then return failure.
if (body === 'failure') return 'failure';
// fetch.spec.11.4. Remove the last 6 code points from mimeType.
mimeType = mimeType.slice(0, -6);
// fetch.spec.11.5. Remove trailing U+0020 SPACE code points from mimeType, if any.
mimeType = mimeType.replace(/(\u0020)+$/, '');
// fetch.spec.11.6. Remove the last U+003B (;) code point from mimeType.
mimeType = mimeType.slice(0, -1);
}
// fetch.spec.12. If mimeType starts with U+003B (;), then prepend "text/plain" to mimeType.
if(mimeType.startsWith(';'))
mimeType = 'text/plain' + mimeType;
// fetch.spec.13. Let mimeTypeRecord be the result of parsing mimeType.
let mimeTypeRecord = parseMIMEType(mimeType);
// fetch.spec.14. If mimeTypeRecord is failure, then set
// mimeTypeRecord to text/plain;charset=US-ASCII.
if(mimeTypeRecord === 'failure')
mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII');
// fetch.spec.11.15. Return a new data: URL struct whose MIME
// type is mimeTypeRecord and body is body.
return { mimeType: mimeTypeRecord, body };
}
/*
* URL serializer
* url: https://url.spec.whatwg.org/#concept-url-serializer
*
* takes a URL url, with an optional boolean exclude fragment (default false)
* They return an ASCII string.
*
* @func urlSerializer
* @param {URL} url - URL object.
* @param {boolean} excludeFragment - (Optional) Boolean parameter, defaulting to false, to exclude fragment.
* @return {string} ASCII string.
* @desc A URL serializer function, as defined by {@link https://url.spec.whatwg.org/#concept-url-serializer WHATWG URL Living Standard}.
*/
function urlSerializer(url, excludeFragment = false){
if(!excludeFragment)
return url.href;
const hash = url.href.lastIndexOf('#');
if(hash === -1)
return url.href;
return url.href.slice(0, hash);
}
/*
* collect a sequence of code points
* url: https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
*
* @func collectASequenceOfCodePoints
* @param {function} condition - (string) => boolean.
* @param {string} input
* @param {{ position: number }} position
* @return {string} Sequence of code points.
* @desc A function To collect a sequence of code points meeting a condition from a string input, as defined by {@link https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points WHATWG Infra Living Standard}.
*/
function collectASequenceOfCodePoints(condition, input, position){
// infra.spec.1. Let result be the empty string.
let result = '';
// infra.spec.2. While position doesn’t point past the end of input and the
// code point at position within input meets the condition condition:
while (position.position < input.length && condition(input[position.position])){
// infra.spec.2.1. Append that code point to the end of result.
result += input[position.position];
// infra.spec.2.2. Advance position by 1.
position.position++;
}
// infra.spec.3. Return result.
return result;
}
/*
* A faster collectASequenceOfCodePoints that only works when comparing a single character.
*
* @param {string} char
* @param {string} input
* @param {{ position: number }} position
*/
function collectASequenceOfCodePointsFast (char, input, position){
const idx = input.indexOf(char, position.position);
const start = position.position;
if(idx === -1){
position.position = input.length;
return input.slice(start);
}
position.position = idx;
return input.slice(start, position.position);
}
/*
* To percent-decode a scalar value string input:
* url: https://url.spec.whatwg.org/#string-percent-decode
*
* @param {string} input
*/
function stringPercentDecode (input) {
const encoder = new TextEncoder();
// url.spec.1. Let bytes be the UTF-8 encoding of input.
const bytes = encoder.encode(input);
// url.spec.2. Return the percent-decoding of bytes.
return percentDecode(bytes);
}
/*
* percent-decode
* url: https://url.spec.whatwg.org/#percent-decode
*
* To percent-decode a byte sequence input,
*
* @param {Uint8Array} input
*/
function percentDecode(input){
// url.spec.1. Let output be an empty byte sequence.
const output = [];
// url.spec.2. For each byte byte in input:
for(let i = 0; i < input.length; i++){
const byte = input[i];
// url.spec.2.1. If byte is not 0x25 (%), then append byte to output.
if (byte !== 0x25) {
output.push(byte);
}
// url.spec.2.2. Otherwise, if byte is 0x25 (%) and the next two bytes
// after byte in input are not in the ranges
// 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
// and 0x61 (a) to 0x66 (f), all inclusive, append byte
// to output.
else
if(
byte === 0x25 &&
!/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
){
output.push(0x25);
}
// url.spec.2.3. Otherwise:
else {
// url.spec.2.3.1. Let bytePoint be the two bytes after byte in input,
// decoded, and then interpreted as hexadecimal number.
const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2]);
const bytePoint = Number.parseInt(nextTwoBytes, 16);
// url.spec.2.3.2. Append a byte whose value is bytePoint to output.
output.push(bytePoint);
// url.spec.2.3.3. Skip the next two bytes in input.
i += 2;
}
}
// url.spec.3. Return output.
return Uint8Array.from(output);
}
/**
* parse a MIME type,
* url: https://mimesniff.spec.whatwg.org/#parse-a-mime-type
*
* @param {string} input
*
* @func parseMIMEType
* @static
* @param {string} input - String input.
* @return {object} Serialized MIME type record.
* @desc Parse a MIME type, given a string input, as defined by {@link https://mimesniff.spec.whatwg.org/#parse-a-mime-type WHATWG MIME Sniffing Living Standard}.
*/
export function parseMIMEType(input){
// mimesniff.spec.1. Remove any leading and trailing HTTP whitespace
// from input.
input = input.trim();
// mimesniff.spec.2. Let position be a position variable for input,
// initially pointing at the start of input.
const position = { position: 0 };
// mimesniff.spec.3. Let type be the result of collecting a sequence
// of code points that are not U+002F (/) from
// input, given position.
const type = collectASequenceOfCodePointsFast(
'/',
input,
position
);
// mimesniff.spec.4. If type is the empty string or does not solely
// contain HTTP token code points, then return failure.
if(type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type))
return 'failure';
// mimesniff.spec.5. If position is past the end of input,
// then return failure
if (position.position > input.length)
return 'failure';
// mimesniff.spec.6. Advance position by 1. (This skips past U+002F (/).)
position.position++;
// mimesniff.spec.7. Let subtype be the result of collecting a sequence of
// code points that are not U+003B (;) from input, given
// position.
let subtype = collectASequenceOfCodePointsFast(
';',
input,
position
);
// mimesniff.spec.8. Remove any trailing HTTP whitespace from subtype.
subtype = subtype.trimEnd();
// mimesniff.spec.9. If subtype is the empty string or does not solely
// contain HTTP token code points, then return failure.
if(subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype))
return 'failure';
// mimesniff.spec.10. Let mimeType be a new MIME type record whose type
// is type, in ASCII lowercase, and subtype is subtype,
// in ASCII lowercase.
const mimeType = {
type: type.toLowerCase(),
subtype: subtype.toLowerCase(),
parameters: new Map(),
essence: `${type}/${subtype}`
};
// mimesniff.spec.11. While position is not past the end of input:
while (position.position < input.length){
// mimesniff.spec.11.1. Advance position by 1. (This skips past U+003B (;).)
position.position++;
// mimesniff.spec.11.2. Collect a sequence of code points that are HTTP
// whitespace from input given position.
collectASequenceOfCodePoints(
char => HTTP_WHITESPACE_REGEX.test(char),
input,
position
);
// mimesniff.spec.11.3. Let parameterName be the result of collecting a
// sequence of code points that are not U+003B (;)
// or U+003D (=) from input, given position.
let parameterName = collectASequenceOfCodePoints(
(char) => char !== ';' && char !== '=',
input,
position
);
// mimesniff.spec.11.4. Set parameterName to parameterName,
// in ASCII lowercase.
parameterName = parameterName.toLowerCase();
// mimesniff.spec.11.5. If position is not past the end of input, then:
if(position.position < input.length){
// mimesniff.spec.11.5.1. If the code point at position within input is
// U+003B (;), then continue.
if(input[position.position] === ';')
continue;
// mimesniff.spec.11.5.2. Advance position by 1. (This skips past U+003D (=).)
position.position++;
}
// mimesniff.spec.11.6. If position is past the end of input, then break.
if(position.position > input.length)
break;
// mimesniff.spec.11.7. Let parameterValue be null.
let parameterValue = null;
// mimesniff.spec.11.8. If the code point at position within input is
// U+0022 ("), then:
if(input[position.position] === '"'){
// mimesniff.spec.11.8.1. Set parameterValue to the result of collecting
// an HTTP quoted string from input, given position
// and the extract-value flag.
parameterValue = collectAnHTTPQuotedString(input, position, true);
// mimesniff.spec.11.8.2. Collect a sequence of code points that are not
// U+003B (;) from input, given position.
collectASequenceOfCodePointsFast(
';',
input,
position
);
}
// mimesniff.spec.11.9. Otherwise:
else{
// mimesniff.spec.11.9.1. Set parameterValue to the result of collecting
// a sequence of code points that are not U+003B (;)
// from input, given position.
parameterValue = collectASequenceOfCodePointsFast(
';',
input,
position
);
// mimesniff.spec.11.9.2. Remove any trailing HTTP whitespace from parameterValue.
// Note: it says "trailing" whitespace; leading is fine.
parameterValue = parameterValue.trimEnd();
// mimesniff.spec.11.9.3. If parameterValue is the empty string, then continue.
if(parameterValue.length === 0)
continue;
}
// mimesniff.spec.11.10. If all of the following are true
// - parameterName is not the empty string
// - parameterName solely contains HTTP token code points
// - parameterValue solely contains HTTP quoted-string token code points
// - mimeType’s parameters[parameterName] does not exist
// then set mimeType’s parameters[parameterName] to parameterValue.
if(
parameterName.length !== 0 &&
HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
!HTTP_QUOTED_STRING_TOKENS.test(parameterValue) &&
!mimeType.parameters.has(parameterName)
) {
mimeType.parameters.set(parameterName, parameterValue);
}
}
// mimesniff.spec.12. Return mimeType.
return mimeType;
}
/*
* forgiving-base64 decode
* url: https://infra.spec.whatwg.org/#forgiving-base64-decode
*
* @param {string} data
*/
function forgivingBase64 (data){
// infra.spec.1. Remove all ASCII whitespace from data.
data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '');
// infra.spec.2. If data’s code point length divides by 4 leaving
// no remainder, then:
if(data.length % 4 === 0){
// infra.spec.2.1. If data ends with one or two U+003D (=) code points,
// then remove them from data.
data = data.replace(/=?=$/, '');
}
// infra.spec.3. If data’s code point length divides by 4 leaving
// a remainder of 1, then return failure.
if(data.length % 4 === 1){
return 'failure';
}
// infra.spec.4. If data contains a code point that is not one of
// U+002B (+)
// U+002F (/)
// ASCII alphanumeric
// then return failure.
if(/[^+/0-9A-Za-z]/.test(data)){
return 'failure';
}
const binary = atob(data);
const bytes = new Uint8Array(binary.length);
for (let byte = 0; byte < binary.length; byte++) {
bytes[byte] = binary.charCodeAt(byte);
}
return bytes;
}
/*
* collect an HTTP quoted string
* url: https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
*
* @param {string} input
* @param {{ position: number }} position
* @param {boolean?} extractValue
*/
function collectAnHTTPQuotedString (input, position, extractValue) {
// fetch.spec.1. Let positionStart be position.
const positionStart = position.position;
// fetch.spec.2. Let value be the empty string.
let value = '';
// fetch.spec.3. Assert: the code point at position within input
// is U+0022 (").
assert(input[position.position] === '"');
// fetch.spec.4. Advance position by 1.
position.position++;
// fetch.spec.5. While true:
while(true){
// fetch.spec.5.1. Append the result of collecting a sequence of code points
// that are not U+0022 (") or U+005C (\) from input, given
// position, to value.
value += collectASequenceOfCodePoints(
(char) => char !== '"' && char !== '\\',
input,
position
);
// fetch.spec.5.2. If position is past the end of input, then break.
if(position.position >= input.length)
break;
// fetch.spec.5.3. Let quoteOrBackslash be the code point at position
// within input.
const quoteOrBackslash = input[position.position];
// fetch.spec.5.4. Advance position by 1.
position.position++;
// fetch.spec.5.5. If quoteOrBackslash is U+005C (\), then:
if(quoteOrBackslash === '\\'){
// fetch.spec.5.5.1. If position is past the end of input, then append
// U+005C (\) to value and break.
if (position.position >= input.length) {
value += '\\'
break
}
// fetch.spec.5.5.2. Append the code point at position within input to value.
value += input[position.position];
// fetch.spec.5.5.3. Advance position by 1.
position.position++;
}
// fetch.spec.5.6. Otherwise:
else{
// fetch.spec.5.6.1. Assert: quoteOrBackslash is U+0022 (").
assert(quoteOrBackslash === '"');
// fetch.spec.5.6.2. Break.
break;
}
}
// fetch.spec.6. If the extract-value flag is set, then return value.
if(extractValue)
return value;
// fetch.spec.7. Return the code points from positionStart to position,
// inclusive, within input.
return input.slice(positionStart, position.position)
}
/**
* serialize a MIME type
* url: https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
*
* @func serializeAMimeType
* @static
* @param {object} mimeType - MIME type record.
* @return {string} Serialized MIME type record.
* @desc Serialize a MIME type, as defined by {@link https://mimesniff.spec.whatwg.org/#serialize-a-mime-type WHATWG MIME Sniffing Living Standard}.
*/
export function serializeAMimeType(mimeType){
assert(mimeType !== 'failure');
const { type, subtype, parameters } = mimeType;
// mimesniff.spec.1. Let serialization be the concatenation of mimeType’s
// type, U+002F (/), and mimeType’s subtype.
let serialization = `${type}/${subtype}`;
// mimesniff.spec.2. For each name → value of mimeType’s parameters:
for(let [name, value] of parameters.entries()){
// mimesniff.spec.2.1. Append U+003B (;) to serialization.
serialization += ';'
// mimesniff.spec.2.2. Append name to serialization.
serialization += name;
// mimesniff.spec.2.3. Append U+003D (=) to serialization.
serialization += '=';
// mimesniff.spec.2.4. If value does not solely contain HTTP token code
// points or value is the empty string, then:
if(!isValidHTTPToken(value)){
// mimesniff.spec.2.4.1. Precede each occurence of U+0022 (") or
// U+005C (\) in value with U+005C (\).
value = value.replace(/(\\|")/g, '\\$1');
// mimesniff.spec.2.4.2. Prepend U+0022 (") to value.
value = '"' + value;
// mimesniff.spec.2.4.3. Append U+0022 (") to value.
value += '"';
}
// mimesniff.spec.2.5. Append value to serialization.
serialization += value;
}
// mimesniff.spec.3. Return serialization.
return serialization;
}
const MAXIMUM_ARGUMENT_LENGTH = 65535
/*
* isomorphic decode
* url: https://infra.spec.whatwg.org/#isomorphic-decode
*
* To isomorphic decode a byte sequence input, return a string whose code point length
* is equal to input’s length and whose code points have the same values as the values
* of input’s bytes, in the same order.
*
* @param {number[]|Uint8Array} input
*/
function isomorphicDecode(input){
if(input.length < MAXIMUM_ARGUMENT_LENGTH)
return String.fromCharCode(...input);
return input.reduce((previous, current) => previous + String.fromCharCode(current), '');
}
// RFC 7230, Section 3.2.6.
// https://github.com/chromium/chromium/blob/d7da0240cae77824d1eda25745c4022757499131/third_party/blink/renderer/platform/network/http_parsers.cc#L321
function isValidHTTPToken(characters){
if(!characters || typeof characters !== 'string')
return false;
for(let i = 0; i < characters.length; ++i){
const c = characters.charCodeAt(i);
if(c > 0x7f || !isTokenChar(c))
return false;
}
return true;
}
function isTokenChar(c){
return !(
c >= 0x7f ||
c <= 0x20 ||
c === '(' ||
c === ')' ||
c === '<' ||
c === '>' ||
c === '@' ||
c === ',' ||
c === ';' ||
c === ':' ||
c === '\\' ||
c === '"' ||
c === '/' ||
c === '[' ||
c === ']' ||
c === '?' ||
c === '=' ||
c === '{' ||
c === '}'
);
}
Source: lib/helper/whatwg-misc.js
A Node.js implementation of the WHATWG XMLHttpRequest Living Standard for non-browser environments.