313 lines
9.7 KiB
JavaScript
313 lines
9.7 KiB
JavaScript
import { Schemas } from '../common/network.js';
|
|
import { reset } from './dom.js';
|
|
import purify from './dompurify/dompurify.js';
|
|
|
|
/*---------------------------------------------------------------------------------------------
|
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
|
*--------------------------------------------------------------------------------------------*/
|
|
/**
|
|
* List of safe, non-input html tags.
|
|
*/
|
|
const basicMarkupHtmlTags = Object.freeze([
|
|
'a',
|
|
'abbr',
|
|
'b',
|
|
'bdo',
|
|
'blockquote',
|
|
'br',
|
|
'caption',
|
|
'cite',
|
|
'code',
|
|
'col',
|
|
'colgroup',
|
|
'dd',
|
|
'del',
|
|
'details',
|
|
'dfn',
|
|
'div',
|
|
'dl',
|
|
'dt',
|
|
'em',
|
|
'figcaption',
|
|
'figure',
|
|
'h1',
|
|
'h2',
|
|
'h3',
|
|
'h4',
|
|
'h5',
|
|
'h6',
|
|
'hr',
|
|
'i',
|
|
'img',
|
|
'ins',
|
|
'kbd',
|
|
'label',
|
|
'li',
|
|
'mark',
|
|
'ol',
|
|
'p',
|
|
'pre',
|
|
'q',
|
|
'rp',
|
|
'rt',
|
|
'ruby',
|
|
's',
|
|
'samp',
|
|
'small',
|
|
'small',
|
|
'source',
|
|
'span',
|
|
'strike',
|
|
'strong',
|
|
'sub',
|
|
'summary',
|
|
'sup',
|
|
'table',
|
|
'tbody',
|
|
'td',
|
|
'tfoot',
|
|
'th',
|
|
'thead',
|
|
'time',
|
|
'tr',
|
|
'tt',
|
|
'u',
|
|
'ul',
|
|
'var',
|
|
'video',
|
|
'wbr',
|
|
]);
|
|
const defaultAllowedAttrs = Object.freeze([
|
|
'href',
|
|
'target',
|
|
'src',
|
|
'alt',
|
|
'title',
|
|
'for',
|
|
'name',
|
|
'role',
|
|
'tabindex',
|
|
'x-dispatch',
|
|
'required',
|
|
'checked',
|
|
'placeholder',
|
|
'type',
|
|
'start',
|
|
'width',
|
|
'height',
|
|
'align',
|
|
]);
|
|
const fakeRelativeUrlProtocol = 'vscode-relative-path';
|
|
function validateLink(value, allowedProtocols) {
|
|
if (allowedProtocols.override === '*') {
|
|
return true; // allow all protocols
|
|
}
|
|
try {
|
|
const url = new URL(value, fakeRelativeUrlProtocol + '://');
|
|
if (allowedProtocols.override.includes(url.protocol.replace(/:$/, ''))) {
|
|
return true;
|
|
}
|
|
if (allowedProtocols.allowRelativePaths
|
|
&& url.protocol === fakeRelativeUrlProtocol + ':'
|
|
&& !value.trim().toLowerCase().startsWith(fakeRelativeUrlProtocol)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
catch (e) {
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Hooks dompurify using `afterSanitizeAttributes` to check that all `href` and `src`
|
|
* attributes are valid.
|
|
*/
|
|
function hookDomPurifyHrefAndSrcSanitizer(allowedLinkProtocols, allowedMediaProtocols) {
|
|
purify.addHook('afterSanitizeAttributes', (node) => {
|
|
// check all href/src attributes for validity
|
|
for (const attr of ['href', 'src']) {
|
|
if (node.hasAttribute(attr)) {
|
|
const attrValue = node.getAttribute(attr);
|
|
if (attr === 'href') {
|
|
if (!attrValue.startsWith('#') && !validateLink(attrValue, allowedLinkProtocols)) {
|
|
node.removeAttribute(attr);
|
|
}
|
|
}
|
|
else { // 'src'
|
|
if (!validateLink(attrValue, allowedMediaProtocols)) {
|
|
node.removeAttribute(attr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
const defaultDomPurifyConfig = Object.freeze({
|
|
ALLOWED_TAGS: [...basicMarkupHtmlTags],
|
|
ALLOWED_ATTR: [...defaultAllowedAttrs],
|
|
// We sanitize the src/href attributes later if needed
|
|
ALLOW_UNKNOWN_PROTOCOLS: true,
|
|
});
|
|
/**
|
|
* Sanitizes an html string.
|
|
*
|
|
* @param untrusted The HTML string to sanitize.
|
|
* @param config Optional configuration for sanitization. If not provided, defaults to a safe configuration.
|
|
*
|
|
* @returns A sanitized string of html.
|
|
*/
|
|
function sanitizeHtml(untrusted, config) {
|
|
return doSanitizeHtml(untrusted, config, 'trusted');
|
|
}
|
|
function doSanitizeHtml(untrusted, config, outputType) {
|
|
try {
|
|
const resolvedConfig = { ...defaultDomPurifyConfig };
|
|
if (config?.allowedTags) {
|
|
if (config.allowedTags.override) {
|
|
resolvedConfig.ALLOWED_TAGS = [...config.allowedTags.override];
|
|
}
|
|
if (config.allowedTags.augment) {
|
|
resolvedConfig.ALLOWED_TAGS = [...(resolvedConfig.ALLOWED_TAGS ?? []), ...config.allowedTags.augment];
|
|
}
|
|
}
|
|
let resolvedAttributes = [...defaultAllowedAttrs];
|
|
if (config?.allowedAttributes) {
|
|
if (config.allowedAttributes.override) {
|
|
resolvedAttributes = [...config.allowedAttributes.override];
|
|
}
|
|
if (config.allowedAttributes.augment) {
|
|
resolvedAttributes = [...resolvedAttributes, ...config.allowedAttributes.augment];
|
|
}
|
|
}
|
|
// All attr names are lower-case in the sanitizer hooks
|
|
resolvedAttributes = resolvedAttributes.map((attr) => {
|
|
if (typeof attr === 'string') {
|
|
return attr.toLowerCase();
|
|
}
|
|
return {
|
|
attributeName: attr.attributeName.toLowerCase(),
|
|
shouldKeep: attr.shouldKeep,
|
|
};
|
|
});
|
|
const allowedAttrNames = new Set(resolvedAttributes.map(attr => typeof attr === 'string' ? attr : attr.attributeName));
|
|
const allowedAttrPredicates = new Map();
|
|
for (const attr of resolvedAttributes) {
|
|
if (typeof attr === 'string') {
|
|
// New string attribute value clears previously set predicates
|
|
allowedAttrPredicates.delete(attr);
|
|
}
|
|
else {
|
|
allowedAttrPredicates.set(attr.attributeName, attr);
|
|
}
|
|
}
|
|
resolvedConfig.ALLOWED_ATTR = Array.from(allowedAttrNames);
|
|
hookDomPurifyHrefAndSrcSanitizer({
|
|
override: config?.allowedLinkProtocols?.override ?? [Schemas.http, Schemas.https],
|
|
allowRelativePaths: config?.allowRelativeLinkPaths ?? false
|
|
}, {
|
|
override: config?.allowedMediaProtocols?.override ?? [Schemas.http, Schemas.https],
|
|
allowRelativePaths: config?.allowRelativeMediaPaths ?? false
|
|
});
|
|
if (config?.replaceWithPlaintext) {
|
|
purify.addHook('uponSanitizeElement', replaceWithPlainTextHook);
|
|
}
|
|
if (allowedAttrPredicates.size) {
|
|
purify.addHook('uponSanitizeAttribute', (node, e) => {
|
|
const predicate = allowedAttrPredicates.get(e.attrName);
|
|
if (predicate) {
|
|
const result = predicate.shouldKeep(node, e);
|
|
if (typeof result === 'string') {
|
|
e.keepAttr = true;
|
|
e.attrValue = result;
|
|
}
|
|
else {
|
|
e.keepAttr = result;
|
|
}
|
|
}
|
|
else {
|
|
e.keepAttr = allowedAttrNames.has(e.attrName);
|
|
}
|
|
});
|
|
}
|
|
if (outputType === 'dom') {
|
|
return purify.sanitize(untrusted, {
|
|
...resolvedConfig,
|
|
RETURN_DOM_FRAGMENT: true
|
|
});
|
|
}
|
|
else {
|
|
return purify.sanitize(untrusted, {
|
|
...resolvedConfig,
|
|
RETURN_TRUSTED_TYPE: true
|
|
}); // Cast from lib TrustedHTML to global TrustedHTML
|
|
}
|
|
}
|
|
finally {
|
|
purify.removeAllHooks();
|
|
}
|
|
}
|
|
const selfClosingTags = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
|
|
const replaceWithPlainTextHook = (node, data, _config) => {
|
|
if (!data.allowedTags[data.tagName] && data.tagName !== 'body') {
|
|
const replacement = convertTagToPlaintext(node);
|
|
if (replacement) {
|
|
if (node.nodeType === Node.COMMENT_NODE) {
|
|
// Workaround for https://github.com/cure53/DOMPurify/issues/1005
|
|
// The comment will be deleted in the next phase. However if we try to remove it now, it will cause
|
|
// an exception. Instead we insert the text node before the comment.
|
|
node.parentElement?.insertBefore(replacement, node);
|
|
}
|
|
else {
|
|
node.parentElement?.replaceChild(replacement, node);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
function convertTagToPlaintext(node) {
|
|
if (!node.ownerDocument) {
|
|
return;
|
|
}
|
|
let startTagText;
|
|
let endTagText;
|
|
if (node.nodeType === Node.COMMENT_NODE) {
|
|
startTagText = `<!--${node.textContent}-->`;
|
|
}
|
|
else if (node instanceof Element) {
|
|
const tagName = node.tagName.toLowerCase();
|
|
const isSelfClosing = selfClosingTags.includes(tagName);
|
|
const attrString = node.attributes.length ?
|
|
' ' + Array.from(node.attributes)
|
|
.map(attr => `${attr.name}="${attr.value}"`)
|
|
.join(' ')
|
|
: '';
|
|
startTagText = `<${tagName}${attrString}>`;
|
|
if (!isSelfClosing) {
|
|
endTagText = `</${tagName}>`;
|
|
}
|
|
}
|
|
else {
|
|
return;
|
|
}
|
|
const fragment = document.createDocumentFragment();
|
|
const textNode = node.ownerDocument.createTextNode(startTagText);
|
|
fragment.appendChild(textNode);
|
|
while (node.firstChild) {
|
|
fragment.appendChild(node.firstChild);
|
|
}
|
|
const endTagTextNode = endTagText ? node.ownerDocument.createTextNode(endTagText) : undefined;
|
|
if (endTagTextNode) {
|
|
fragment.appendChild(endTagTextNode);
|
|
}
|
|
return fragment;
|
|
}
|
|
/**
|
|
* Sanitizes the given `value` and reset the given `node` with it.
|
|
*/
|
|
function safeSetInnerHtml(node, untrusted, config) {
|
|
const fragment = doSanitizeHtml(untrusted, config, 'dom');
|
|
reset(node, fragment);
|
|
}
|
|
|
|
export { basicMarkupHtmlTags, convertTagToPlaintext, defaultAllowedAttrs, safeSetInnerHtml, sanitizeHtml };
|