XMLHttpRequest: Source: lib/helper/whatwg-misc.js

/**
 * @module  whatwg-misc
 * @desc    Helper module - Helper functions for the main module {@link module:whatwg-xhr whatwg-xhr}.
 * @version 1.0.0
 * @author  Essam A. El-Sherif
 */

/* Import nodeJS core modules */
import assert   from 'node:assert/strict';
import { atob } from 'node:buffer';

/*
 * HTTP token code point
 * url: https://mimesniff.spec.whatwg.org/#http-token-code-point
 *
 * @const HTTP_TOKEN_CODEPOINTS
 * @desc  An HTTP token code point, as defined by {@link https://mimesniff.spec.whatwg.org/#http-token-code-point WHATWG MIME Sniffing Living Standard}.
 */
const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-z0-9]+$/;

/*
 * A whitespace byte (abbreviated 0xWS)
 * url: https://mimesniff.spec.whatwg.org/#whitespace-byte
 *
 * @const HTTP_WHITESPACE_REGEX
 * @desc  A whitespace byte, as defined by {@link https://mimesniff.spec.whatwg.org/#whitespace-byte WHATWG MIME Sniffing Living Standard}.
 */
const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/;

/*
 * HTTP quoted-string token code point
 * url: https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
 *
 * @const HTTP_QUOTED_STRING_TOKENS
 * @desc  HTTP quoted-string token code point, as defined by {@link https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point WHATWG MIME Sniffing Living Standard}.
 */
const HTTP_QUOTED_STRING_TOKENS = /^(\u0009|\x{0020}-\x{007E}|\x{0080}-\x{00FF})+$/;

/**
 * The data: URL processor takes a URL dataURL and then runs these steps:
 * url: https://fetch.spec.whatwg.org/#data-url-processor
 *
 *  1. Assert: dataURL’s scheme is "data".
 *  2. Let input be the result of running the URL serializer on dataURL with exclude fragment set to true.
 *  3. Remove the leading "data:" from input.
 *  4. Let position point at the start of input.
 *  5. Let mimeType be the result of collecting a sequence of code points that are not equal to U+002C (,), given position.
 *  6. Strip leading and trailing ASCII whitespace from mimeType.
 *     Note: This will only remove U+0020 SPACE code points, if any.
 *  7. If position is past the end of input, then return failure.
 *  8. Advance position by 1.
 *  9. Let encodedBody be the remainder of input.
 * 10. Let body be the percent-decoding of encodedBody.
 * 11. If mimeType ends with U+003B (;), followed by zero or more U+0020 SPACE, followed by an ASCII case-insensitive match for "base64", then:
 *     1. Let stringBody be the isomorphic decode of body.
 *     2. Set body to the forgiving-base64 decode of stringBody.
 *     3. If body is failure, then return failure.
 *     4. Remove the last 6 code points from mimeType.
 *     5. Remove trailing U+0020 SPACE code points from mimeType, if any.
 *     6. Remove the last U+003B (;) from mimeType.
 * 12. If mimeType starts with ";", then prepend "text/plain" to mimeType.
 * 13. Let mimeTypeRecord be the result of parsing mimeType.
 * 14. If mimeTypeRecord is failure, then set mimeTypeRecord to text/plain;charset=US-ASCII.
 * 15. Return a new data: URL struct whose MIME type is mimeTypeRecord and body is body.
 *
 * @func   dataURLProcessor
 * @static
 * @param  {URL} dataURL - URL object.
 * @return {object|string} An object of mimeType and body, or 'failure' string.
 * @desc A dataURLProcessor function, as defined by {@link https://fetch.spec.whatwg.org/#data-url-processor WHATWG Fetch Living Standard}.
 */
export function dataURLProcessor(dataURL){

	// fetch.spec.1. Assert: dataURL’s scheme is "data".
	assert(dataURL.protocol === 'data:');

	// fetch.spec.2. Let input be the result of running the URL
	//               serializer on dataURL with exclude fragment
	//               set to true.
	let input = urlSerializer(dataURL, true);

	// fetch.spec.3. Remove the leading "data:" string from input.
	input = input.slice(5);

	// fetch.spec.4. Let position point at the start of input.
	const position = { position: 0 };

	// fetch.spec.5. Let mimeType be the result of collecting a
	//               sequence of code points that are not equal
	//               to U+002C (,), given position.
	let mimeType = collectASequenceOfCodePointsFast(
		',',
		input,
		position
	);

	// fetch.spec.6. Strip leading and trailing ASCII whitespace from mimeType.
	//               Note: This will only remove U+0020 SPACE code points, if any.
	//               Undici implementation note: we need to store the length because
	//               if the mimetype has spaces removed, the wrong amount will be
	//               sliced from the input in step #9.
	const mimeTypeLength = mimeType.length;
	mimeType = mimeType.replace(/^(\u0020)+|(\u0020)+$/g, '');

	// fetch.spec.7. If position is past the end of input, then return failure
	if(position.position >= input.length)
		return 'failure';

	// fetch.spec.8. Advance position by 1.
	position.position++;

	// fetch.spec.9. Let encodedBody be the remainder of input.
	const encodedBody = input.slice(mimeTypeLength + 1);

	// fetch.spec.10. Let body be the percent-decoding of encodedBody.
	let body = stringPercentDecode(encodedBody);

	// fetch.spec.11. If mimeType ends with U+003B (;), followed by
	//                zero or more U+0020 SPACE, followed by an ASCII
	//                case-insensitive match for "base64", then:
	if(/;(\u0020){0,}base64$/i.test(mimeType)){

		// fetch.spec.11.1. Let stringBody be the isomorphic decode of body.
		const stringBody = isomorphicDecode(body);

		// fetch.spec.11.2. Set body to the forgiving-base64 decode of stringBody.
		body = forgivingBase64(stringBody);

		// fetch.spec.11.3. If body is failure, then return failure.
		if (body === 'failure') return 'failure';

		// fetch.spec.11.4. Remove the last 6 code points from mimeType.
		mimeType = mimeType.slice(0, -6);

		// fetch.spec.11.5. Remove trailing U+0020 SPACE code points from mimeType, if any.
		mimeType = mimeType.replace(/(\u0020)+$/, '');

		// fetch.spec.11.6. Remove the last U+003B (;) code point from mimeType.
		mimeType = mimeType.slice(0, -1);
	}

	// fetch.spec.12. If mimeType starts with U+003B (;), then prepend "text/plain" to mimeType.
	if(mimeType.startsWith(';'))
		mimeType = 'text/plain' + mimeType;

	// fetch.spec.13. Let mimeTypeRecord be the result of parsing mimeType.
	let mimeTypeRecord = parseMIMEType(mimeType);

	// fetch.spec.14. If mimeTypeRecord is failure, then set
	//                mimeTypeRecord to text/plain;charset=US-ASCII.
	if(mimeTypeRecord === 'failure')
		mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII');

	// fetch.spec.11.15. Return a new data: URL struct whose MIME
	//                   type is mimeTypeRecord and body is body.

	return { mimeType: mimeTypeRecord, body };
}

/*
 * URL serializer
 * url: https://url.spec.whatwg.org/#concept-url-serializer
 *
 * takes a URL url, with an optional boolean exclude fragment (default false)
 * They return an ASCII string.
 *
 * @func   urlSerializer
 * @param  {URL} url - URL object.
 * @param  {boolean} excludeFragment - (Optional) Boolean parameter, defaulting to false, to exclude fragment.
 * @return {string} ASCII string.
 * @desc   A URL serializer function, as defined by {@link https://url.spec.whatwg.org/#concept-url-serializer WHATWG URL Living Standard}.
 */
function urlSerializer(url, excludeFragment = false){

	if(!excludeFragment)
    	return url.href;

	const hash = url.href.lastIndexOf('#');
	if(hash === -1)
		return url.href;

	return url.href.slice(0, hash);
}

/*
 * collect a sequence of code points
 * url: https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
 *
 * @func   collectASequenceOfCodePoints
 * @param  {function} condition - (string) => boolean.
 * @param  {string} input
 * @param  {{ position: number }} position
 * @return {string} Sequence of code points.
 * @desc   A function To collect a sequence of code points meeting a condition from a string input, as defined by {@link https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points WHATWG Infra Living Standard}.
 */
function collectASequenceOfCodePoints(condition, input, position){

	// infra.spec.1. Let result be the empty string.
	let result = '';

	// infra.spec.2. While position doesn’t point past the end of input and the
	//               code point at position within input meets the condition condition:
	while (position.position < input.length && condition(input[position.position])){

		// infra.spec.2.1. Append that code point to the end of result.
		result += input[position.position];

		// infra.spec.2.2. Advance position by 1.
		position.position++;
	}

	// infra.spec.3. Return result.
	return result;
}

/*
 * A faster collectASequenceOfCodePoints that only works when comparing a single character.
 *
 *   @param {string} char
 *   @param {string} input
 *   @param {{ position: number }} position
 */
function collectASequenceOfCodePointsFast (char, input, position){

	const idx = input.indexOf(char, position.position);
	const start = position.position;

	if(idx === -1){
		position.position = input.length;
		return input.slice(start);
	}

	position.position = idx;

	return input.slice(start, position.position);
}

/*
 * To percent-decode a scalar value string input:
 * url: https://url.spec.whatwg.org/#string-percent-decode
 *
 * @param {string} input
 */
function stringPercentDecode (input) {

	const encoder = new TextEncoder();

	// url.spec.1. Let bytes be the UTF-8 encoding of input.
	const bytes = encoder.encode(input);

	// url.spec.2. Return the percent-decoding of bytes.
	return percentDecode(bytes);
}

/*
 * percent-decode
 * url: https://url.spec.whatwg.org/#percent-decode
 *
 * To percent-decode a byte sequence input,
 *
 * @param {Uint8Array} input
 */
function percentDecode(input){

	// url.spec.1. Let output be an empty byte sequence.
	const output = [];

	// url.spec.2. For each byte byte in input:
	for(let i = 0; i < input.length; i++){
		const byte = input[i];

		// url.spec.2.1. If byte is not 0x25 (%), then append byte to output.
		if (byte !== 0x25) {
			output.push(byte);
		}
		// url.spec.2.2. Otherwise, if byte is 0x25 (%) and the next two bytes
		//               after byte in input are not in the ranges
		//               0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
		//               and 0x61 (a) to 0x66 (f), all inclusive, append byte
		//               to output.
		else
		if(
			byte === 0x25 &&
			!/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
		){
			output.push(0x25);
		}
		// url.spec.2.3. Otherwise:
		else {
			// url.spec.2.3.1. Let bytePoint be the two bytes after byte in input,
			//                 decoded, and then interpreted as hexadecimal number.

			const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2]);
			const bytePoint = Number.parseInt(nextTwoBytes, 16);

			// url.spec.2.3.2. Append a byte whose value is bytePoint to output.
			output.push(bytePoint);

			// url.spec.2.3.3. Skip the next two bytes in input.
			i += 2;
		}
	}

	// url.spec.3. Return output.
	return Uint8Array.from(output);
}

/**
 * parse a MIME type,
 * url: https://mimesniff.spec.whatwg.org/#parse-a-mime-type
 *
 * @param {string} input
 *
 * @func   parseMIMEType
 * @static
 * @param  {string} input - String input.
 * @return {object} Serialized MIME type record.
 * @desc   Parse a MIME type, given a string input, as defined by {@link https://mimesniff.spec.whatwg.org/#parse-a-mime-type WHATWG MIME Sniffing Living Standard}.
 */
export function parseMIMEType(input){

	// mimesniff.spec.1. Remove any leading and trailing HTTP whitespace
	//                   from input.
	input = input.trim();

	// mimesniff.spec.2. Let position be a position variable for input,
	//                   initially pointing at the start of input.
	const position = { position: 0 };

	// mimesniff.spec.3. Let type be the result of collecting a sequence
	//                   of code points that are not U+002F (/) from
	//                   input, given position.
	const type = collectASequenceOfCodePointsFast(
		'/',
		input,
		position
	);

	// mimesniff.spec.4. If type is the empty string or does not solely
	//                   contain HTTP token code points, then return failure.
	if(type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type))
		return 'failure';

	// mimesniff.spec.5. If position is past the end of input,
	//                   then return failure
	if (position.position > input.length)
		return 'failure';

	// mimesniff.spec.6. Advance position by 1. (This skips past U+002F (/).)
	position.position++;

	// mimesniff.spec.7. Let subtype be the result of collecting a sequence of
	//                   code points that are not U+003B (;) from input, given
	//                   position.
	let subtype = collectASequenceOfCodePointsFast(
		';',
		input,
		position
	);

	// mimesniff.spec.8. Remove any trailing HTTP whitespace from subtype.
	subtype = subtype.trimEnd();

	// mimesniff.spec.9. If subtype is the empty string or does not solely
	//                   contain HTTP token code points, then return failure.
	if(subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype))
		return 'failure';

	// mimesniff.spec.10. Let mimeType be a new MIME type record whose type
	//                    is type, in ASCII lowercase, and subtype is subtype,
	//                    in ASCII lowercase.
	const mimeType = {
		type:       type.toLowerCase(),
		subtype:    subtype.toLowerCase(),
		parameters: new Map(),
		essence:    `${type}/${subtype}`
	};

	// mimesniff.spec.11. While position is not past the end of input:
	while (position.position < input.length){

		// mimesniff.spec.11.1. Advance position by 1. (This skips past U+003B (;).)
		position.position++;

		// mimesniff.spec.11.2. Collect a sequence of code points that are HTTP
		//                      whitespace from input given position.
		collectASequenceOfCodePoints(
			char => HTTP_WHITESPACE_REGEX.test(char),
			input,
			position
		);

		// mimesniff.spec.11.3. Let parameterName be the result of collecting a
		//                      sequence of code points that are not U+003B (;)
		//                      or U+003D (=) from input, given position.
		let parameterName = collectASequenceOfCodePoints(
			(char) => char !== ';' && char !== '=',
			input,
			position
		);

		// mimesniff.spec.11.4. Set parameterName to parameterName,
		//                      in ASCII lowercase.
		parameterName = parameterName.toLowerCase();

		// mimesniff.spec.11.5. If position is not past the end of input, then:
		if(position.position < input.length){
			// mimesniff.spec.11.5.1. If the code point at position within input is
			//                        U+003B (;), then continue.
			if(input[position.position] === ';')
				continue;

			// mimesniff.spec.11.5.2. Advance position by 1. (This skips past U+003D (=).)
			position.position++;
		}

		// mimesniff.spec.11.6. If position is past the end of input, then break.
		if(position.position > input.length)
			break;

		// mimesniff.spec.11.7. Let parameterValue be null.
		let parameterValue = null;

		// mimesniff.spec.11.8. If the code point at position within input is
		//                      U+0022 ("), then:
		if(input[position.position] === '"'){

			// mimesniff.spec.11.8.1. Set parameterValue to the result of collecting
			//                        an HTTP quoted string from input, given position
			//                        and the extract-value flag.
			parameterValue = collectAnHTTPQuotedString(input, position, true);

			// mimesniff.spec.11.8.2. Collect a sequence of code points that are not
			//                        U+003B (;) from input, given position.
			collectASequenceOfCodePointsFast(
				';',
				input,
				position
			);
		}
		// mimesniff.spec.11.9. Otherwise:
		else{
			// mimesniff.spec.11.9.1. Set parameterValue to the result of collecting
			//                        a sequence of code points that are not U+003B (;)
			//                        from input, given position.
			parameterValue = collectASequenceOfCodePointsFast(
				';',
				input,
				position
			);

			// mimesniff.spec.11.9.2. Remove any trailing HTTP whitespace from parameterValue.
			//                        Note: it says "trailing" whitespace; leading is fine.
			parameterValue = parameterValue.trimEnd();

			// mimesniff.spec.11.9.3. If parameterValue is the empty string, then continue.
			if(parameterValue.length === 0)
				continue;
		}

		// mimesniff.spec.11.10. If all of the following are true
		//                       - parameterName is not the empty string
		//                       - parameterName solely contains HTTP token code points
		//                       - parameterValue solely contains HTTP quoted-string token code points
		//                       - mimeType’s parameters[parameterName] does not exist
		//                       then set mimeType’s parameters[parameterName] to parameterValue.
		if(
			parameterName.length !== 0                      &&
			HTTP_TOKEN_CODEPOINTS.test(parameterName)       &&
			!HTTP_QUOTED_STRING_TOKENS.test(parameterValue) &&
			!mimeType.parameters.has(parameterName)
		) {
			mimeType.parameters.set(parameterName, parameterValue);
		}
	}

	// mimesniff.spec.12. Return mimeType.
	return mimeType;
}

/*
 * forgiving-base64 decode
 * url: https://infra.spec.whatwg.org/#forgiving-base64-decode
 *
 * @param {string} data
 */
function forgivingBase64 (data){

	// infra.spec.1. Remove all ASCII whitespace from data.
	data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '');

	// infra.spec.2. If data’s code point length divides by 4 leaving
	//               no remainder, then:
	if(data.length % 4 === 0){
		// infra.spec.2.1. If data ends with one or two U+003D (=) code points,
		//                 then remove them from data.
		data = data.replace(/=?=$/, '');
	}

	// infra.spec.3. If data’s code point length divides by 4 leaving
	//               a remainder of 1, then return failure.
	if(data.length % 4 === 1){
		return 'failure';
	}

	// infra.spec.4. If data contains a code point that is not one of
	//               U+002B (+)
	//	             U+002F (/)
	//	             ASCII alphanumeric
	//               then return failure.
	if(/[^+/0-9A-Za-z]/.test(data)){
		return 'failure';
	}

	const binary = atob(data);
	const bytes = new Uint8Array(binary.length);

	for (let byte = 0; byte < binary.length; byte++) {
		bytes[byte] = binary.charCodeAt(byte);
	}

	return bytes;
}

/*
 * collect an HTTP quoted string
 * url: https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
 *
 * @param {string} input
 * @param {{ position: number }} position
 * @param {boolean?} extractValue
 */
function collectAnHTTPQuotedString (input, position, extractValue) {

	// fetch.spec.1. Let positionStart be position.
	const positionStart = position.position;

	// fetch.spec.2. Let value be the empty string.
	let value = '';

	// fetch.spec.3. Assert: the code point at position within input
	//               is U+0022 (").
	assert(input[position.position] === '"');

	// fetch.spec.4. Advance position by 1.
	position.position++;

	// fetch.spec.5. While true:
	while(true){
		// fetch.spec.5.1. Append the result of collecting a sequence of code points
		//                 that are not U+0022 (") or U+005C (\) from input, given
		//                 position, to value.
		value += collectASequenceOfCodePoints(
			(char) => char !== '"' && char !== '\\',
			input,
			position
		);

		// fetch.spec.5.2. If position is past the end of input, then break.
		if(position.position >= input.length)
			break;

		// fetch.spec.5.3. Let quoteOrBackslash be the code point at position
		//                 within input.
		const quoteOrBackslash = input[position.position];

		// fetch.spec.5.4. Advance position by 1.
		position.position++;

		// fetch.spec.5.5. If quoteOrBackslash is U+005C (\), then:
		if(quoteOrBackslash === '\\'){
			// fetch.spec.5.5.1. If position is past the end of input, then append
			//                   U+005C (\) to value and break.
			if (position.position >= input.length) {
				value += '\\'
				break
			}

			// fetch.spec.5.5.2. Append the code point at position within input to value.
			value += input[position.position];

			// fetch.spec.5.5.3. Advance position by 1.
			position.position++;
		}
		// fetch.spec.5.6. Otherwise:
		else{
			// fetch.spec.5.6.1. Assert: quoteOrBackslash is U+0022 (").
			assert(quoteOrBackslash === '"');

			// fetch.spec.5.6.2. Break.
			break;
		}
	}

	// fetch.spec.6. If the extract-value flag is set, then return value.
	if(extractValue)
		return value;

	// fetch.spec.7. Return the code points from positionStart to position,
	//               inclusive, within input.
	return input.slice(positionStart, position.position)
}

/**
 * serialize a MIME type
 * url: https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
 *
 * @func   serializeAMimeType
 * @static
 * @param  {object} mimeType - MIME type record.
 * @return {string} Serialized MIME type record.
 * @desc   Serialize a MIME type, as defined by {@link https://mimesniff.spec.whatwg.org/#serialize-a-mime-type WHATWG MIME Sniffing Living Standard}.
 */
export function serializeAMimeType(mimeType){

	assert(mimeType !== 'failure');

	const { type, subtype, parameters } = mimeType;

	// mimesniff.spec.1. Let serialization be the concatenation of mimeType’s
	//		             type, U+002F (/), and mimeType’s subtype.
	let serialization = `${type}/${subtype}`;

	// mimesniff.spec.2. For each name → value of mimeType’s parameters:
	for(let [name, value] of parameters.entries()){

		// mimesniff.spec.2.1. Append U+003B (;) to serialization.
		serialization += ';'

		// mimesniff.spec.2.2. Append name to serialization.
		serialization += name;

		// mimesniff.spec.2.3. Append U+003D (=) to serialization.
		serialization += '=';

		// mimesniff.spec.2.4. If value does not solely contain HTTP token code
		//		               points or value is the empty string, then:
		if(!isValidHTTPToken(value)){
			// mimesniff.spec.2.4.1. Precede each occurence of U+0022 (") or
			//		                 U+005C (\) in value with U+005C (\).
			value = value.replace(/(\\|")/g, '\\$1');

			// mimesniff.spec.2.4.2. Prepend U+0022 (") to value.
			value = '"' + value;

			// mimesniff.spec.2.4.3. Append U+0022 (") to value.
			value += '"';
		}

		// mimesniff.spec.2.5. Append value to serialization.
		serialization += value;
	}

	// mimesniff.spec.3. Return serialization.
	return serialization;
}

const MAXIMUM_ARGUMENT_LENGTH = 65535

/*
 * isomorphic decode
 * url: https://infra.spec.whatwg.org/#isomorphic-decode
 *
 * To isomorphic decode a byte sequence input, return a string whose code point length
 * is equal to input’s length and whose code points have the same values as the values
 * of input’s bytes, in the same order.
 *
 * @param {number[]|Uint8Array} input
 */
function isomorphicDecode(input){

	if(input.length < MAXIMUM_ARGUMENT_LENGTH)
		return String.fromCharCode(...input);

	return input.reduce((previous, current) => previous + String.fromCharCode(current), '');
}

// RFC 7230, Section 3.2.6.
// https://github.com/chromium/chromium/blob/d7da0240cae77824d1eda25745c4022757499131/third_party/blink/renderer/platform/network/http_parsers.cc#L321

function isValidHTTPToken(characters){

	if(!characters || typeof characters !== 'string')
		return false;

	for(let i = 0; i < characters.length; ++i){
		const c = characters.charCodeAt(i);
		if(c > 0x7f || !isTokenChar(c))
			return false;
	}

	return true;
}

function isTokenChar(c){
	return !(
		c >= 0x7f ||
		c <= 0x20 ||
		c === '(' ||
		c === ')' ||
		c === '<' ||
		c === '>' ||
		c === '@' ||
		c === ',' ||
		c === ';' ||
		c === ':' ||
		c === '\\' ||
		c === '"' ||
		c === '/' ||
		c === '[' ||
		c === ']' ||
		c === '?' ||
		c === '=' ||
		c === '{' ||
		c === '}'
	);
}