diff --git a/__tests__/redirector.ts b/__tests__/redirector.ts index e0d370178..4a668d0ff 100644 --- a/__tests__/redirector.ts +++ b/__tests__/redirector.ts @@ -105,6 +105,22 @@ describe('GET /r/:postId', () => { .expect(302) .expect('Location', 'http://localhost:5002/posts/p1-p1'); }); + + it('should not escape already encoded URL', async () => { + await con + .getRepository(ArticlePost) + .update( + { id: 'p1' }, + { url: 'http://p1.com/hello%20world/%f0%9f%9a%80-to-the-🌔' }, + ); + return request(app.server) + .get('/r/p1') + .expect(302) + .expect( + 'Location', + 'http://p1.com/hello%20world/%f0%9f%9a%80-to-the-%F0%9F%8C%94?ref=dailydev', + ); + }); }); describe('GET /:id/profile-image', () => { diff --git a/src/common/encodeurl.ts b/src/common/encodeurl.ts new file mode 100644 index 000000000..c8cb42811 --- /dev/null +++ b/src/common/encodeurl.ts @@ -0,0 +1,52 @@ +/*! + * encodeurl + * Copyright(c) 2016 Douglas Christopher Wilson + * MIT Licensed + */ + +/** + * RegExp to match non-URL code points, *after* encoding (i.e. not including "%") + * and including invalid escape sequences. + * @private + */ + +const ENCODE_CHARS_REGEXP = + /(?:[^\x21\x23-\x3B\x3D\x3F-\x5F\x61-\x7A\x7C\x7E]|%(?:[^0-9A-Fa-f]|[0-9A-Fa-f][^0-9A-Fa-f]|$))+/g; + +/** + * RegExp to match unmatched surrogate pair. + * @private + */ + +const UNMATCHED_SURROGATE_PAIR_REGEXP = + /(^|[^\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF]([^\uDC00-\uDFFF]|$)/g; + +/** + * String to replace unmatched surrogate pair with. + * @private + */ + +const UNMATCHED_SURROGATE_PAIR_REPLACE = '$1\uFFFD$2'; + +/** + * Encode a URL to a percent-encoded form, excluding already-encoded sequences. + * + * This function will take an already-encoded URL and encode all the non-URL + * code points. This function will not encode the "%" character unless it is + * not part of a valid sequence (`%20` will be left as-is, but `%foo` will + * be encoded as `%25foo`). + * + * This encode is meant to be "safe" and does not throw errors. It will try as + * hard as it can to properly encode the given URL, including replacing any raw, + * unpaired surrogate pairs with the Unicode replacement character prior to + * encoding. + * + * @param {string} url + * @return {string} + * @public + */ + +export const encodeUrl = (url: string): string => + String(url) + .replace(UNMATCHED_SURROGATE_PAIR_REGEXP, UNMATCHED_SURROGATE_PAIR_REPLACE) + .replace(ENCODE_CHARS_REGEXP, encodeURI); diff --git a/src/routes/redirector.ts b/src/routes/redirector.ts index c26179248..191b59c0e 100644 --- a/src/routes/redirector.ts +++ b/src/routes/redirector.ts @@ -3,6 +3,7 @@ import { FastifyInstance } from 'fastify'; import { ArticlePost, Post } from '../entity'; import { getDiscussionLink, notifyView } from '../common'; import createOrGetConnection from '../db'; +import { encodeUrl } from '../common/encodeurl'; export default async function (fastify: FastifyInstance): Promise { fastify.get<{ Params: { postId: string }; Querystring: { a?: string } }>( @@ -35,7 +36,7 @@ export default async function (fastify: FastifyInstance): Promise { } const url = new URL(post.url); url.searchParams.append('ref', 'dailydev'); - const encodedUri = encodeURI(url.href); + const encodedUri = encodeUrl(url.href); if (req.isBot) { return res.status(302).redirect(encodedUri); }