Use QString literals

This patch covers src/gui and some leftovers from previous commit.
This commit is contained in:
Chocobo1
2022-03-12 22:00:58 +08:00
parent 5341478036
commit 802ec5a14e
59 changed files with 912 additions and 902 deletions

View File

@@ -40,6 +40,7 @@
#include <QXmlStreamEntityResolver>
#include <QXmlStreamReader>
#include "base/global.h"
#include "rss_article.h"
namespace
@@ -55,303 +56,303 @@ namespace
// http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
static const QHash<QString, QString> HTMLEntities
{
{"nbsp", "&#160;"}, // no-break space = non-breaking space, U+00A0 ISOnum
{"iexcl", "&#161;"}, // inverted exclamation mark, U+00A1 ISOnum
{"cent", "&#162;"}, // cent sign, U+00A2 ISOnum
{"pound", "&#163;"}, // pound sign, U+00A3 ISOnum
{"curren", "&#164;"}, // currency sign, U+00A4 ISOnum
{"yen", "&#165;"}, // yen sign = yuan sign, U+00A5 ISOnum
{"brvbar", "&#166;"}, // broken bar = broken vertical bar, U+00A6 ISOnum
{"sect", "&#167;"}, // section sign, U+00A7 ISOnum
{"uml", "&#168;"}, // diaeresis = spacing diaeresis, U+00A8 ISOdia
{"copy", "&#169;"}, // copyright sign, U+00A9 ISOnum
{"ordf", "&#170;"}, // feminine ordinal indicator, U+00AA ISOnum
{"laquo", "&#171;"}, // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
{"not", "&#172;"}, // not sign = angled dash, U+00AC ISOnum
{"shy", "&#173;"}, // soft hyphen = discretionary hyphen, U+00AD ISOnum
{"reg", "&#174;"}, // registered sign = registered trade mark sign, U+00AE ISOnum
{"macr", "&#175;"}, // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
{"deg", "&#176;"}, // degree sign, U+00B0 ISOnum
{"plusmn", "&#177;"}, // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
{"sup2", "&#178;"}, // superscript two = superscript digit two = squared, U+00B2 ISOnum
{"sup3", "&#179;"}, // superscript three = superscript digit three = cubed, U+00B3 ISOnum
{"acute", "&#180;"}, // acute accent = spacing acute, U+00B4 ISOdia
{"micro", "&#181;"}, // micro sign, U+00B5 ISOnum
{"para", "&#182;"}, // pilcrow sign = paragraph sign, U+00B6 ISOnum
{"middot", "&#183;"}, // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
{"cedil", "&#184;"}, // cedilla = spacing cedilla, U+00B8 ISOdia
{"sup1", "&#185;"}, // superscript one = superscript digit one, U+00B9 ISOnum
{"ordm", "&#186;"}, // masculine ordinal indicator, U+00BA ISOnum
{"raquo", "&#187;"}, // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
{"frac14", "&#188;"}, // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
{"frac12", "&#189;"}, // vulgar fraction one half = fraction one half, U+00BD ISOnum
{"frac34", "&#190;"}, // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
{"iquest", "&#191;"}, // inverted question mark = turned question mark, U+00BF ISOnum
{"Agrave", "&#192;"}, // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
{"Aacute", "&#193;"}, // latin capital letter A with acute, U+00C1 ISOlat1
{"Acirc", "&#194;"}, // latin capital letter A with circumflex, U+00C2 ISOlat1
{"Atilde", "&#195;"}, // latin capital letter A with tilde, U+00C3 ISOlat1
{"Auml", "&#196;"}, // latin capital letter A with diaeresis, U+00C4 ISOlat1
{"Aring", "&#197;"}, // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
{"AElig", "&#198;"}, // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
{"Ccedil", "&#199;"}, // latin capital letter C with cedilla, U+00C7 ISOlat1
{"Egrave", "&#200;"}, // latin capital letter E with grave, U+00C8 ISOlat1
{"Eacute", "&#201;"}, // latin capital letter E with acute, U+00C9 ISOlat1
{"Ecirc", "&#202;"}, // latin capital letter E with circumflex, U+00CA ISOlat1
{"Euml", "&#203;"}, // latin capital letter E with diaeresis, U+00CB ISOlat1
{"Igrave", "&#204;"}, // latin capital letter I with grave, U+00CC ISOlat1
{"Iacute", "&#205;"}, // latin capital letter I with acute, U+00CD ISOlat1
{"Icirc", "&#206;"}, // latin capital letter I with circumflex, U+00CE ISOlat1
{"Iuml", "&#207;"}, // latin capital letter I with diaeresis, U+00CF ISOlat1
{"ETH", "&#208;"}, // latin capital letter ETH, U+00D0 ISOlat1
{"Ntilde", "&#209;"}, // latin capital letter N with tilde, U+00D1 ISOlat1
{"Ograve", "&#210;"}, // latin capital letter O with grave, U+00D2 ISOlat1
{"Oacute", "&#211;"}, // latin capital letter O with acute, U+00D3 ISOlat1
{"Ocirc", "&#212;"}, // latin capital letter O with circumflex, U+00D4 ISOlat1
{"Otilde", "&#213;"}, // latin capital letter O with tilde, U+00D5 ISOlat1
{"Ouml", "&#214;"}, // latin capital letter O with diaeresis, U+00D6 ISOlat1
{"times", "&#215;"}, // multiplication sign, U+00D7 ISOnum
{"Oslash", "&#216;"}, // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
{"Ugrave", "&#217;"}, // latin capital letter U with grave, U+00D9 ISOlat1
{"Uacute", "&#218;"}, // latin capital letter U with acute, U+00DA ISOlat1
{"Ucirc", "&#219;"}, // latin capital letter U with circumflex, U+00DB ISOlat1
{"Uuml", "&#220;"}, // latin capital letter U with diaeresis, U+00DC ISOlat1
{"Yacute", "&#221;"}, // latin capital letter Y with acute, U+00DD ISOlat1
{"THORN", "&#222;"}, // latin capital letter THORN, U+00DE ISOlat1
{"szlig", "&#223;"}, // latin small letter sharp s = ess-zed, U+00DF ISOlat1
{"agrave", "&#224;"}, // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
{"aacute", "&#225;"}, // latin small letter a with acute, U+00E1 ISOlat1
{"acirc", "&#226;"}, // latin small letter a with circumflex, U+00E2 ISOlat1
{"atilde", "&#227;"}, // latin small letter a with tilde, U+00E3 ISOlat1
{"auml", "&#228;"}, // latin small letter a with diaeresis, U+00E4 ISOlat1
{"aring", "&#229;"}, // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
{"aelig", "&#230;"}, // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
{"ccedil", "&#231;"}, // latin small letter c with cedilla, U+00E7 ISOlat1
{"egrave", "&#232;"}, // latin small letter e with grave, U+00E8 ISOlat1
{"eacute", "&#233;"}, // latin small letter e with acute, U+00E9 ISOlat1
{"ecirc", "&#234;"}, // latin small letter e with circumflex, U+00EA ISOlat1
{"euml", "&#235;"}, // latin small letter e with diaeresis, U+00EB ISOlat1
{"igrave", "&#236;"}, // latin small letter i with grave, U+00EC ISOlat1
{"iacute", "&#237;"}, // latin small letter i with acute, U+00ED ISOlat1
{"icirc", "&#238;"}, // latin small letter i with circumflex, U+00EE ISOlat1
{"iuml", "&#239;"}, // latin small letter i with diaeresis, U+00EF ISOlat1
{"eth", "&#240;"}, // latin small letter eth, U+00F0 ISOlat1
{"ntilde", "&#241;"}, // latin small letter n with tilde, U+00F1 ISOlat1
{"ograve", "&#242;"}, // latin small letter o with grave, U+00F2 ISOlat1
{"oacute", "&#243;"}, // latin small letter o with acute, U+00F3 ISOlat1
{"ocirc", "&#244;"}, // latin small letter o with circumflex, U+00F4 ISOlat1
{"otilde", "&#245;"}, // latin small letter o with tilde, U+00F5 ISOlat1
{"ouml", "&#246;"}, // latin small letter o with diaeresis, U+00F6 ISOlat1
{"divide", "&#247;"}, // division sign, U+00F7 ISOnum
{"oslash", "&#248;"}, // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
{"ugrave", "&#249;"}, // latin small letter u with grave, U+00F9 ISOlat1
{"uacute", "&#250;"}, // latin small letter u with acute, U+00FA ISOlat1
{"ucirc", "&#251;"}, // latin small letter u with circumflex, U+00FB ISOlat1
{"uuml", "&#252;"}, // latin small letter u with diaeresis, U+00FC ISOlat1
{"yacute", "&#253;"}, // latin small letter y with acute, U+00FD ISOlat1
{"thorn", "&#254;"}, // latin small letter thorn, U+00FE ISOlat1
{"yuml", "&#255;"}, // latin small letter y with diaeresis, U+00FF ISOlat1
{u"nbsp"_qs, u"&#160;"_qs}, // no-break space = non-breaking space, U+00A0 ISOnum
{u"iexcl"_qs, u"&#161;"_qs}, // inverted exclamation mark, U+00A1 ISOnum
{u"cent"_qs, u"&#162;"_qs}, // cent sign, U+00A2 ISOnum
{u"pound"_qs, u"&#163;"_qs}, // pound sign, U+00A3 ISOnum
{u"curren"_qs, u"&#164;"_qs}, // currency sign, U+00A4 ISOnum
{u"yen"_qs, u"&#165;"_qs}, // yen sign = yuan sign, U+00A5 ISOnum
{u"brvbar"_qs, u"&#166;"_qs}, // broken bar = broken vertical bar, U+00A6 ISOnum
{u"sect"_qs, u"&#167;"_qs}, // section sign, U+00A7 ISOnum
{u"uml"_qs, u"&#168;"_qs}, // diaeresis = spacing diaeresis, U+00A8 ISOdia
{u"copy"_qs, u"&#169;"_qs}, // copyright sign, U+00A9 ISOnum
{u"ordf"_qs, u"&#170;"_qs}, // feminine ordinal indicator, U+00AA ISOnum
{u"laquo"_qs, u"&#171;"_qs}, // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
{u"not"_qs, u"&#172;"_qs}, // not sign = angled dash, U+00AC ISOnum
{u"shy"_qs, u"&#173;"_qs}, // soft hyphen = discretionary hyphen, U+00AD ISOnum
{u"reg"_qs, u"&#174;"_qs}, // registered sign = registered trade mark sign, U+00AE ISOnum
{u"macr"_qs, u"&#175;"_qs}, // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
{u"deg"_qs, u"&#176;"_qs}, // degree sign, U+00B0 ISOnum
{u"plusmn"_qs, u"&#177;"_qs}, // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
{u"sup2"_qs, u"&#178;"_qs}, // superscript two = superscript digit two = squared, U+00B2 ISOnum
{u"sup3"_qs, u"&#179;"_qs}, // superscript three = superscript digit three = cubed, U+00B3 ISOnum
{u"acute"_qs, u"&#180;"_qs}, // acute accent = spacing acute, U+00B4 ISOdia
{u"micro"_qs, u"&#181;"_qs}, // micro sign, U+00B5 ISOnum
{u"para"_qs, u"&#182;"_qs}, // pilcrow sign = paragraph sign, U+00B6 ISOnum
{u"middot"_qs, u"&#183;"_qs}, // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
{u"cedil"_qs, u"&#184;"_qs}, // cedilla = spacing cedilla, U+00B8 ISOdia
{u"sup1"_qs, u"&#185;"_qs}, // superscript one = superscript digit one, U+00B9 ISOnum
{u"ordm"_qs, u"&#186;"_qs}, // masculine ordinal indicator, U+00BA ISOnum
{u"raquo"_qs, u"&#187;"_qs}, // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
{u"frac14"_qs, u"&#188;"_qs}, // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
{u"frac12"_qs, u"&#189;"_qs}, // vulgar fraction one half = fraction one half, U+00BD ISOnum
{u"frac34"_qs, u"&#190;"_qs}, // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
{u"iquest"_qs, u"&#191;"_qs}, // inverted question mark = turned question mark, U+00BF ISOnum
{u"Agrave"_qs, u"&#192;"_qs}, // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
{u"Aacute"_qs, u"&#193;"_qs}, // latin capital letter A with acute, U+00C1 ISOlat1
{u"Acirc"_qs, u"&#194;"_qs}, // latin capital letter A with circumflex, U+00C2 ISOlat1
{u"Atilde"_qs, u"&#195;"_qs}, // latin capital letter A with tilde, U+00C3 ISOlat1
{u"Auml"_qs, u"&#196;"_qs}, // latin capital letter A with diaeresis, U+00C4 ISOlat1
{u"Aring"_qs, u"&#197;"_qs}, // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
{u"AElig"_qs, u"&#198;"_qs}, // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
{u"Ccedil"_qs, u"&#199;"_qs}, // latin capital letter C with cedilla, U+00C7 ISOlat1
{u"Egrave"_qs, u"&#200;"_qs}, // latin capital letter E with grave, U+00C8 ISOlat1
{u"Eacute"_qs, u"&#201;"_qs}, // latin capital letter E with acute, U+00C9 ISOlat1
{u"Ecirc"_qs, u"&#202;"_qs}, // latin capital letter E with circumflex, U+00CA ISOlat1
{u"Euml"_qs, u"&#203;"_qs}, // latin capital letter E with diaeresis, U+00CB ISOlat1
{u"Igrave"_qs, u"&#204;"_qs}, // latin capital letter I with grave, U+00CC ISOlat1
{u"Iacute"_qs, u"&#205;"_qs}, // latin capital letter I with acute, U+00CD ISOlat1
{u"Icirc"_qs, u"&#206;"_qs}, // latin capital letter I with circumflex, U+00CE ISOlat1
{u"Iuml"_qs, u"&#207;"_qs}, // latin capital letter I with diaeresis, U+00CF ISOlat1
{u"ETH"_qs, u"&#208;"_qs}, // latin capital letter ETH, U+00D0 ISOlat1
{u"Ntilde"_qs, u"&#209;"_qs}, // latin capital letter N with tilde, U+00D1 ISOlat1
{u"Ograve"_qs, u"&#210;"_qs}, // latin capital letter O with grave, U+00D2 ISOlat1
{u"Oacute"_qs, u"&#211;"_qs}, // latin capital letter O with acute, U+00D3 ISOlat1
{u"Ocirc"_qs, u"&#212;"_qs}, // latin capital letter O with circumflex, U+00D4 ISOlat1
{u"Otilde"_qs, u"&#213;"_qs}, // latin capital letter O with tilde, U+00D5 ISOlat1
{u"Ouml"_qs, u"&#214;"_qs}, // latin capital letter O with diaeresis, U+00D6 ISOlat1
{u"times"_qs, u"&#215;"_qs}, // multiplication sign, U+00D7 ISOnum
{u"Oslash"_qs, u"&#216;"_qs}, // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
{u"Ugrave"_qs, u"&#217;"_qs}, // latin capital letter U with grave, U+00D9 ISOlat1
{u"Uacute"_qs, u"&#218;"_qs}, // latin capital letter U with acute, U+00DA ISOlat1
{u"Ucirc"_qs, u"&#219;"_qs}, // latin capital letter U with circumflex, U+00DB ISOlat1
{u"Uuml"_qs, u"&#220;"_qs}, // latin capital letter U with diaeresis, U+00DC ISOlat1
{u"Yacute"_qs, u"&#221;"_qs}, // latin capital letter Y with acute, U+00DD ISOlat1
{u"THORN"_qs, u"&#222;"_qs}, // latin capital letter THORN, U+00DE ISOlat1
{u"szlig"_qs, u"&#223;"_qs}, // latin small letter sharp s = ess-zed, U+00DF ISOlat1
{u"agrave"_qs, u"&#224;"_qs}, // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
{u"aacute"_qs, u"&#225;"_qs}, // latin small letter a with acute, U+00E1 ISOlat1
{u"acirc"_qs, u"&#226;"_qs}, // latin small letter a with circumflex, U+00E2 ISOlat1
{u"atilde"_qs, u"&#227;"_qs}, // latin small letter a with tilde, U+00E3 ISOlat1
{u"auml"_qs, u"&#228;"_qs}, // latin small letter a with diaeresis, U+00E4 ISOlat1
{u"aring"_qs, u"&#229;"_qs}, // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
{u"aelig"_qs, u"&#230;"_qs}, // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
{u"ccedil"_qs, u"&#231;"_qs}, // latin small letter c with cedilla, U+00E7 ISOlat1
{u"egrave"_qs, u"&#232;"_qs}, // latin small letter e with grave, U+00E8 ISOlat1
{u"eacute"_qs, u"&#233;"_qs}, // latin small letter e with acute, U+00E9 ISOlat1
{u"ecirc"_qs, u"&#234;"_qs}, // latin small letter e with circumflex, U+00EA ISOlat1
{u"euml"_qs, u"&#235;"_qs}, // latin small letter e with diaeresis, U+00EB ISOlat1
{u"igrave"_qs, u"&#236;"_qs}, // latin small letter i with grave, U+00EC ISOlat1
{u"iacute"_qs, u"&#237;"_qs}, // latin small letter i with acute, U+00ED ISOlat1
{u"icirc"_qs, u"&#238;"_qs}, // latin small letter i with circumflex, U+00EE ISOlat1
{u"iuml"_qs, u"&#239;"_qs}, // latin small letter i with diaeresis, U+00EF ISOlat1
{u"eth"_qs, u"&#240;"_qs}, // latin small letter eth, U+00F0 ISOlat1
{u"ntilde"_qs, u"&#241;"_qs}, // latin small letter n with tilde, U+00F1 ISOlat1
{u"ograve"_qs, u"&#242;"_qs}, // latin small letter o with grave, U+00F2 ISOlat1
{u"oacute"_qs, u"&#243;"_qs}, // latin small letter o with acute, U+00F3 ISOlat1
{u"ocirc"_qs, u"&#244;"_qs}, // latin small letter o with circumflex, U+00F4 ISOlat1
{u"otilde"_qs, u"&#245;"_qs}, // latin small letter o with tilde, U+00F5 ISOlat1
{u"ouml"_qs, u"&#246;"_qs}, // latin small letter o with diaeresis, U+00F6 ISOlat1
{u"divide"_qs, u"&#247;"_qs}, // division sign, U+00F7 ISOnum
{u"oslash"_qs, u"&#248;"_qs}, // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
{u"ugrave"_qs, u"&#249;"_qs}, // latin small letter u with grave, U+00F9 ISOlat1
{u"uacute"_qs, u"&#250;"_qs}, // latin small letter u with acute, U+00FA ISOlat1
{u"ucirc"_qs, u"&#251;"_qs}, // latin small letter u with circumflex, U+00FB ISOlat1
{u"uuml"_qs, u"&#252;"_qs}, // latin small letter u with diaeresis, U+00FC ISOlat1
{u"yacute"_qs, u"&#253;"_qs}, // latin small letter y with acute, U+00FD ISOlat1
{u"thorn"_qs, u"&#254;"_qs}, // latin small letter thorn, U+00FE ISOlat1
{u"yuml"_qs, u"&#255;"_qs}, // latin small letter y with diaeresis, U+00FF ISOlat1
// Latin Extended-A
{"OElig", "&#338;"}, // latin capital ligature OE, U+0152 ISOlat2
{"oelig", "&#339;"}, // latin small ligature oe, U+0153 ISOlat2
{u"OElig"_qs, u"&#338;"_qs}, // latin capital ligature OE, U+0152 ISOlat2
{u"oelig"_qs, u"&#339;"_qs}, // latin small ligature oe, U+0153 ISOlat2
// ligature is a misnomer, this is a separate character in some languages
{"Scaron", "&#352;"}, // latin capital letter S with caron, U+0160 ISOlat2
{"scaron", "&#353;"}, // latin small letter s with caron, U+0161 ISOlat2
{"Yuml", "&#376;"}, // latin capital letter Y with diaeresis, U+0178 ISOlat2
{u"Scaron"_qs, u"&#352;"_qs}, // latin capital letter S with caron, U+0160 ISOlat2
{u"scaron"_qs, u"&#353;"_qs}, // latin small letter s with caron, U+0161 ISOlat2
{u"Yuml"_qs, u"&#376;"_qs}, // latin capital letter Y with diaeresis, U+0178 ISOlat2
// Spacing Modifier Letters
{"circ", "&#710;"}, // modifier letter circumflex accent, U+02C6 ISOpub
{"tilde", "&#732;"}, // small tilde, U+02DC ISOdia
{u"circ"_qs, u"&#710;"_qs}, // modifier letter circumflex accent, U+02C6 ISOpub
{u"tilde"_qs, u"&#732;"_qs}, // small tilde, U+02DC ISOdia
// General Punctuation
{"ensp", "&#8194;"}, // en space, U+2002 ISOpub
{"emsp", "&#8195;"}, // em space, U+2003 ISOpub
{"thinsp", "&#8201;"}, // thin space, U+2009 ISOpub
{"zwnj", "&#8204;"}, // zero width non-joiner, U+200C NEW RFC 2070
{"zwj", "&#8205;"}, // zero width joiner, U+200D NEW RFC 2070
{"lrm", "&#8206;"}, // left-to-right mark, U+200E NEW RFC 2070
{"rlm", "&#8207;"}, // right-to-left mark, U+200F NEW RFC 2070
{"ndash", "&#8211;"}, // en dash, U+2013 ISOpub
{"mdash", "&#8212;"}, // em dash, U+2014 ISOpub
{"lsquo", "&#8216;"}, // left single quotation mark, U+2018 ISOnum
{"rsquo", "&#8217;"}, // right single quotation mark, U+2019 ISOnum
{"sbquo", "&#8218;"}, // single low-9 quotation mark, U+201A NEW
{"ldquo", "&#8220;"}, // left double quotation mark, U+201C ISOnum
{"rdquo", "&#8221;"}, // right double quotation mark, U+201D ISOnum
{"bdquo", "&#8222;"}, // double low-9 quotation mark, U+201E NEW
{"dagger", "&#8224;"}, // dagger, U+2020 ISOpub
{"Dagger", "&#8225;"}, // double dagger, U+2021 ISOpub
{"permil", "&#8240;"}, // per mille sign, U+2030 ISOtech
{"lsaquo", "&#8249;"}, // single left-pointing angle quotation mark, U+2039 ISO proposed
{u"ensp"_qs, u"&#8194;"_qs}, // en space, U+2002 ISOpub
{u"emsp"_qs, u"&#8195;"_qs}, // em space, U+2003 ISOpub
{u"thinsp"_qs, u"&#8201;"_qs}, // thin space, U+2009 ISOpub
{u"zwnj"_qs, u"&#8204;"_qs}, // zero width non-joiner, U+200C NEW RFC 2070
{u"zwj"_qs, u"&#8205;"_qs}, // zero width joiner, U+200D NEW RFC 2070
{u"lrm"_qs, u"&#8206;"_qs}, // left-to-right mark, U+200E NEW RFC 2070
{u"rlm"_qs, u"&#8207;"_qs}, // right-to-left mark, U+200F NEW RFC 2070
{u"ndash"_qs, u"&#8211;"_qs}, // en dash, U+2013 ISOpub
{u"mdash"_qs, u"&#8212;"_qs}, // em dash, U+2014 ISOpub
{u"lsquo"_qs, u"&#8216;"_qs}, // left single quotation mark, U+2018 ISOnum
{u"rsquo"_qs, u"&#8217;"_qs}, // right single quotation mark, U+2019 ISOnum
{u"sbquo"_qs, u"&#8218;"_qs}, // single low-9 quotation mark, U+201A NEW
{u"ldquo"_qs, u"&#8220;"_qs}, // left double quotation mark, U+201C ISOnum
{u"rdquo"_qs, u"&#8221;"_qs}, // right double quotation mark, U+201D ISOnum
{u"bdquo"_qs, u"&#8222;"_qs}, // double low-9 quotation mark, U+201E NEW
{u"dagger"_qs, u"&#8224;"_qs}, // dagger, U+2020 ISOpub
{u"Dagger"_qs, u"&#8225;"_qs}, // double dagger, U+2021 ISOpub
{u"permil"_qs, u"&#8240;"_qs}, // per mille sign, U+2030 ISOtech
{u"lsaquo"_qs, u"&#8249;"_qs}, // single left-pointing angle quotation mark, U+2039 ISO proposed
// lsaquo is proposed but not yet ISO standardized
{"rsaquo", "&#8250;"}, // single right-pointing angle quotation mark, U+203A ISO proposed
{u"rsaquo"_qs, u"&#8250;"_qs}, // single right-pointing angle quotation mark, U+203A ISO proposed
// rsaquo is proposed but not yet ISO standardized
// Currency Symbols
{"euro", "&#8364;"}, // euro sign, U+20AC NEW
{u"euro"_qs, u"&#8364;"_qs}, // euro sign, U+20AC NEW
// Latin Extended-B
{"fnof", "&#402;"}, // latin small letter f with hook = function = florin, U+0192 ISOtech
{u"fnof"_qs, u"&#402;"_qs}, // latin small letter f with hook = function = florin, U+0192 ISOtech
// Greek
{"Alpha", "&#913;"}, // greek capital letter alpha, U+0391
{"Beta", "&#914;"}, // greek capital letter beta, U+0392
{"Gamma", "&#915;"}, // greek capital letter gamma, U+0393 ISOgrk3
{"Delta", "&#916;"}, // greek capital letter delta, U+0394 ISOgrk3
{"Epsilon", "&#917;"}, // greek capital letter epsilon, U+0395
{"Zeta", "&#918;"}, // greek capital letter zeta, U+0396
{"Eta", "&#919;"}, // greek capital letter eta, U+0397
{"Theta", "&#920;"}, // greek capital letter theta, U+0398 ISOgrk3
{"Iota", "&#921;"}, // greek capital letter iota, U+0399
{"Kappa", "&#922;"}, // greek capital letter kappa, U+039A
{"Lambda", "&#923;"}, // greek capital letter lamda, U+039B ISOgrk3
{"Mu", "&#924;"}, // greek capital letter mu, U+039C
{"Nu", "&#925;"}, // greek capital letter nu, U+039D
{"Xi", "&#926;"}, // greek capital letter xi, U+039E ISOgrk3
{"Omicron", "&#927;"}, // greek capital letter omicron, U+039F
{"Pi", "&#928;"}, // greek capital letter pi, U+03A0 ISOgrk3
{"Rho", "&#929;"}, // greek capital letter rho, U+03A1
{"Sigma", "&#931;"}, // greek capital letter sigma, U+03A3 ISOgrk3
{"Tau", "&#932;"}, // greek capital letter tau, U+03A4
{"Upsilon", "&#933;"}, // greek capital letter upsilon, U+03A5 ISOgrk3
{"Phi", "&#934;"}, // greek capital letter phi, U+03A6 ISOgrk3
{"Chi", "&#935;"}, // greek capital letter chi, U+03A7
{"Psi", "&#936;"}, // greek capital letter psi, U+03A8 ISOgrk3
{"Omega", "&#937;"}, // greek capital letter omega, U+03A9 ISOgrk3
{"alpha", "&#945;"}, // greek small letter alpha, U+03B1 ISOgrk3
{"beta", "&#946;"}, // greek small letter beta, U+03B2 ISOgrk3
{"gamma", "&#947;"}, // greek small letter gamma, U+03B3 ISOgrk3
{"delta", "&#948;"}, // greek small letter delta, U+03B4 ISOgrk3
{"epsilon", "&#949;"}, // greek small letter epsilon, U+03B5 ISOgrk3
{"zeta", "&#950;"}, // greek small letter zeta, U+03B6 ISOgrk3
{"eta", "&#951;"}, // greek small letter eta, U+03B7 ISOgrk3
{"theta", "&#952;"}, // greek small letter theta, U+03B8 ISOgrk3
{"iota", "&#953;"}, // greek small letter iota, U+03B9 ISOgrk3
{"kappa", "&#954;"}, // greek small letter kappa, U+03BA ISOgrk3
{"lambda", "&#955;"}, // greek small letter lamda, U+03BB ISOgrk3
{"mu", "&#956;"}, // greek small letter mu, U+03BC ISOgrk3
{"nu", "&#957;"}, // greek small letter nu, U+03BD ISOgrk3
{"xi", "&#958;"}, // greek small letter xi, U+03BE ISOgrk3
{"omicron", "&#959;"}, // greek small letter omicron, U+03BF NEW
{"pi", "&#960;"}, // greek small letter pi, U+03C0 ISOgrk3
{"rho", "&#961;"}, // greek small letter rho, U+03C1 ISOgrk3
{"sigmaf", "&#962;"}, // greek small letter final sigma, U+03C2 ISOgrk3
{"sigma", "&#963;"}, // greek small letter sigma, U+03C3 ISOgrk3
{"tau", "&#964;"}, // greek small letter tau, U+03C4 ISOgrk3
{"upsilon", "&#965;"}, // greek small letter upsilon, U+03C5 ISOgrk3
{"phi", "&#966;"}, // greek small letter phi, U+03C6 ISOgrk3
{"chi", "&#967;"}, // greek small letter chi, U+03C7 ISOgrk3
{"psi", "&#968;"}, // greek small letter psi, U+03C8 ISOgrk3
{"omega", "&#969;"}, // greek small letter omega, U+03C9 ISOgrk3
{"thetasym", "&#977;"}, // greek theta symbol, U+03D1 NEW
{"upsih", "&#978;"}, // greek upsilon with hook symbol, U+03D2 NEW
{"piv", "&#982;"}, // greek pi symbol, U+03D6 ISOgrk3
{u"Alpha"_qs, u"&#913;"_qs}, // greek capital letter alpha, U+0391
{u"Beta"_qs, u"&#914;"_qs}, // greek capital letter beta, U+0392
{u"Gamma"_qs, u"&#915;"_qs}, // greek capital letter gamma, U+0393 ISOgrk3
{u"Delta"_qs, u"&#916;"_qs}, // greek capital letter delta, U+0394 ISOgrk3
{u"Epsilon"_qs, u"&#917;"_qs}, // greek capital letter epsilon, U+0395
{u"Zeta"_qs, u"&#918;"_qs}, // greek capital letter zeta, U+0396
{u"Eta"_qs, u"&#919;"_qs}, // greek capital letter eta, U+0397
{u"Theta"_qs, u"&#920;"_qs}, // greek capital letter theta, U+0398 ISOgrk3
{u"Iota"_qs, u"&#921;"_qs}, // greek capital letter iota, U+0399
{u"Kappa"_qs, u"&#922;"_qs}, // greek capital letter kappa, U+039A
{u"Lambda"_qs, u"&#923;"_qs}, // greek capital letter lamda, U+039B ISOgrk3
{u"Mu"_qs, u"&#924;"_qs}, // greek capital letter mu, U+039C
{u"Nu"_qs, u"&#925;"_qs}, // greek capital letter nu, U+039D
{u"Xi"_qs, u"&#926;"_qs}, // greek capital letter xi, U+039E ISOgrk3
{u"Omicron"_qs, u"&#927;"_qs}, // greek capital letter omicron, U+039F
{u"Pi"_qs, u"&#928;"_qs}, // greek capital letter pi, U+03A0 ISOgrk3
{u"Rho"_qs, u"&#929;"_qs}, // greek capital letter rho, U+03A1
{u"Sigma"_qs, u"&#931;"_qs}, // greek capital letter sigma, U+03A3 ISOgrk3
{u"Tau"_qs, u"&#932;"_qs}, // greek capital letter tau, U+03A4
{u"Upsilon"_qs, u"&#933;"_qs}, // greek capital letter upsilon, U+03A5 ISOgrk3
{u"Phi"_qs, u"&#934;"_qs}, // greek capital letter phi, U+03A6 ISOgrk3
{u"Chi"_qs, u"&#935;"_qs}, // greek capital letter chi, U+03A7
{u"Psi"_qs, u"&#936;"_qs}, // greek capital letter psi, U+03A8 ISOgrk3
{u"Omega"_qs, u"&#937;"_qs}, // greek capital letter omega, U+03A9 ISOgrk3
{u"alpha"_qs, u"&#945;"_qs}, // greek small letter alpha, U+03B1 ISOgrk3
{u"beta"_qs, u"&#946;"_qs}, // greek small letter beta, U+03B2 ISOgrk3
{u"gamma"_qs, u"&#947;"_qs}, // greek small letter gamma, U+03B3 ISOgrk3
{u"delta"_qs, u"&#948;"_qs}, // greek small letter delta, U+03B4 ISOgrk3
{u"epsilon"_qs, u"&#949;"_qs}, // greek small letter epsilon, U+03B5 ISOgrk3
{u"zeta"_qs, u"&#950;"_qs}, // greek small letter zeta, U+03B6 ISOgrk3
{u"eta"_qs, u"&#951;"_qs}, // greek small letter eta, U+03B7 ISOgrk3
{u"theta"_qs, u"&#952;"_qs}, // greek small letter theta, U+03B8 ISOgrk3
{u"iota"_qs, u"&#953;"_qs}, // greek small letter iota, U+03B9 ISOgrk3
{u"kappa"_qs, u"&#954;"_qs}, // greek small letter kappa, U+03BA ISOgrk3
{u"lambda"_qs, u"&#955;"_qs}, // greek small letter lamda, U+03BB ISOgrk3
{u"mu"_qs, u"&#956;"_qs}, // greek small letter mu, U+03BC ISOgrk3
{u"nu"_qs, u"&#957;"_qs}, // greek small letter nu, U+03BD ISOgrk3
{u"xi"_qs, u"&#958;"_qs}, // greek small letter xi, U+03BE ISOgrk3
{u"omicron"_qs, u"&#959;"_qs}, // greek small letter omicron, U+03BF NEW
{u"pi"_qs, u"&#960;"_qs}, // greek small letter pi, U+03C0 ISOgrk3
{u"rho"_qs, u"&#961;"_qs}, // greek small letter rho, U+03C1 ISOgrk3
{u"sigmaf"_qs, u"&#962;"_qs}, // greek small letter final sigma, U+03C2 ISOgrk3
{u"sigma"_qs, u"&#963;"_qs}, // greek small letter sigma, U+03C3 ISOgrk3
{u"tau"_qs, u"&#964;"_qs}, // greek small letter tau, U+03C4 ISOgrk3
{u"upsilon"_qs, u"&#965;"_qs}, // greek small letter upsilon, U+03C5 ISOgrk3
{u"phi"_qs, u"&#966;"_qs}, // greek small letter phi, U+03C6 ISOgrk3
{u"chi"_qs, u"&#967;"_qs}, // greek small letter chi, U+03C7 ISOgrk3
{u"psi"_qs, u"&#968;"_qs}, // greek small letter psi, U+03C8 ISOgrk3
{u"omega"_qs, u"&#969;"_qs}, // greek small letter omega, U+03C9 ISOgrk3
{u"thetasym"_qs, u"&#977;"_qs}, // greek theta symbol, U+03D1 NEW
{u"upsih"_qs, u"&#978;"_qs}, // greek upsilon with hook symbol, U+03D2 NEW
{u"piv"_qs, u"&#982;"_qs}, // greek pi symbol, U+03D6 ISOgrk3
// General Punctuation
{"bull", "&#8226;"}, // bullet = black small circle, U+2022 ISOpub
{u"bull"_qs, u"&#8226;"_qs}, // bullet = black small circle, U+2022 ISOpub
// bullet is NOT the same as bullet operator, U+2219
{"hellip", "&#8230;"}, // horizontal ellipsis = three dot leader, U+2026 ISOpub
{"prime", "&#8242;"}, // prime = minutes = feet, U+2032 ISOtech
{"Prime", "&#8243;"}, // double prime = seconds = inches, U+2033 ISOtech
{"oline", "&#8254;"}, // overline = spacing overscore, U+203E NEW
{"frasl", "&#8260;"}, // fraction slash, U+2044 NEW
{u"hellip"_qs, u"&#8230;"_qs}, // horizontal ellipsis = three dot leader, U+2026 ISOpub
{u"prime"_qs, u"&#8242;"_qs}, // prime = minutes = feet, U+2032 ISOtech
{u"Prime"_qs, u"&#8243;"_qs}, // double prime = seconds = inches, U+2033 ISOtech
{u"oline"_qs, u"&#8254;"_qs}, // overline = spacing overscore, U+203E NEW
{u"frasl"_qs, u"&#8260;"_qs}, // fraction slash, U+2044 NEW
// Letterlike Symbols
{"weierp", "&#8472;"}, // script capital P = power set = Weierstrass p, U+2118 ISOamso
{"image", "&#8465;"}, // black-letter capital I = imaginary part, U+2111 ISOamso
{"real", "&#8476;"}, // black-letter capital R = real part symbol, U+211C ISOamso
{"trade", "&#8482;"}, // trade mark sign, U+2122 ISOnum
{"alefsym", "&#8501;"}, // alef symbol = first transfinite cardinal, U+2135 NEW
{u"weierp"_qs, u"&#8472;"_qs}, // script capital P = power set = Weierstrass p, U+2118 ISOamso
{u"image"_qs, u"&#8465;"_qs}, // black-letter capital I = imaginary part, U+2111 ISOamso
{u"real"_qs, u"&#8476;"_qs}, // black-letter capital R = real part symbol, U+211C ISOamso
{u"trade"_qs, u"&#8482;"_qs}, // trade mark sign, U+2122 ISOnum
{u"alefsym"_qs, u"&#8501;"_qs}, // alef symbol = first transfinite cardinal, U+2135 NEW
// alef symbol is NOT the same as hebrew letter alef,
// U+05D0 although the same glyph could be used to depict both characters
// Arrows
{"larr", "&#8592;"}, // leftwards arrow, U+2190 ISOnum
{"uarr", "&#8593;"}, // upwards arrow, U+2191 ISOnum
{"rarr", "&#8594;"}, // rightwards arrow, U+2192 ISOnum
{"darr", "&#8595;"}, // downwards arrow, U+2193 ISOnum
{"harr", "&#8596;"}, // left right arrow, U+2194 ISOamsa
{"crarr", "&#8629;"}, // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
{"lArr", "&#8656;"}, // leftwards double arrow, U+21D0 ISOtech
{u"larr"_qs, u"&#8592;"_qs}, // leftwards arrow, U+2190 ISOnum
{u"uarr"_qs, u"&#8593;"_qs}, // upwards arrow, U+2191 ISOnum
{u"rarr"_qs, u"&#8594;"_qs}, // rightwards arrow, U+2192 ISOnum
{u"darr"_qs, u"&#8595;"_qs}, // downwards arrow, U+2193 ISOnum
{u"harr"_qs, u"&#8596;"_qs}, // left right arrow, U+2194 ISOamsa
{u"crarr"_qs, u"&#8629;"_qs}, // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
{u"lArr"_qs, u"&#8656;"_qs}, // leftwards double arrow, U+21D0 ISOtech
// Unicode does not say that lArr is the same as the 'is implied by' arrow
// but also does not have any other character for that function. So lArr can
// be used for 'is implied by' as ISOtech suggests
{"uArr", "&#8657;"}, // upwards double arrow, U+21D1 ISOamsa
{"rArr", "&#8658;"}, // rightwards double arrow, U+21D2 ISOtech
{u"uArr"_qs, u"&#8657;"_qs}, // upwards double arrow, U+21D1 ISOamsa
{u"rArr"_qs, u"&#8658;"_qs}, // rightwards double arrow, U+21D2 ISOtech
// Unicode does not say this is the 'implies' character but does not have
// another character with this function so rArr can be used for 'implies'
// as ISOtech suggests
{"dArr", "&#8659;"}, // downwards double arrow, U+21D3 ISOamsa
{"hArr", "&#8660;"}, // left right double arrow, U+21D4 ISOamsa
{u"dArr"_qs, u"&#8659;"_qs}, // downwards double arrow, U+21D3 ISOamsa
{u"hArr"_qs, u"&#8660;"_qs}, // left right double arrow, U+21D4 ISOamsa
// Mathematical Operators
{"forall", "&#8704;"}, // for all, U+2200 ISOtech
{"part", "&#8706;"}, // partial differential, U+2202 ISOtech
{"exist", "&#8707;"}, // there exists, U+2203 ISOtech
{"empty", "&#8709;"}, // empty set = null set, U+2205 ISOamso
{"nabla", "&#8711;"}, // nabla = backward difference, U+2207 ISOtech
{"isin", "&#8712;"}, // element of, U+2208 ISOtech
{"notin", "&#8713;"}, // not an element of, U+2209 ISOtech
{"ni", "&#8715;"}, // contains as member, U+220B ISOtech
{"prod", "&#8719;"}, // n-ary product = product sign, U+220F ISOamsb
{u"forall"_qs, u"&#8704;"_qs}, // for all, U+2200 ISOtech
{u"part"_qs, u"&#8706;"_qs}, // partial differential, U+2202 ISOtech
{u"exist"_qs, u"&#8707;"_qs}, // there exists, U+2203 ISOtech
{u"empty"_qs, u"&#8709;"_qs}, // empty set = null set, U+2205 ISOamso
{u"nabla"_qs, u"&#8711;"_qs}, // nabla = backward difference, U+2207 ISOtech
{u"isin"_qs, u"&#8712;"_qs}, // element of, U+2208 ISOtech
{u"notin"_qs, u"&#8713;"_qs}, // not an element of, U+2209 ISOtech
{u"ni"_qs, u"&#8715;"_qs}, // contains as member, U+220B ISOtech
{u"prod"_qs, u"&#8719;"_qs}, // n-ary product = product sign, U+220F ISOamsb
// prod is NOT the same character as U+03A0 'greek capital letter pi' though
// the same glyph might be used for both
{"sum", "&#8721;"}, // n-ary summation, U+2211 ISOamsb
{u"sum"_qs, u"&#8721;"_qs}, // n-ary summation, U+2211 ISOamsb
// sum is NOT the same character as U+03A3 'greek capital letter sigma'
// though the same glyph might be used for both
{"minus", "&#8722;"}, // minus sign, U+2212 ISOtech
{"lowast", "&#8727;"}, // asterisk operator, U+2217 ISOtech
{"radic", "&#8730;"}, // square root = radical sign, U+221A ISOtech
{"prop", "&#8733;"}, // proportional to, U+221D ISOtech
{"infin", "&#8734;"}, // infinity, U+221E ISOtech
{"ang", "&#8736;"}, // angle, U+2220 ISOamso
{"and", "&#8743;"}, // logical and = wedge, U+2227 ISOtech
{"or", "&#8744;"}, // logical or = vee, U+2228 ISOtech
{"cap", "&#8745;"}, // intersection = cap, U+2229 ISOtech
{"cup", "&#8746;"}, // union = cup, U+222A ISOtech
{"int", "&#8747;"}, // integral, U+222B ISOtech
{"there4", "&#8756;"}, // therefore, U+2234 ISOtech
{"sim", "&#8764;"}, // tilde operator = varies with = similar to, U+223C ISOtech
{u"minus"_qs, u"&#8722;"_qs}, // minus sign, U+2212 ISOtech
{u"lowast"_qs, u"&#8727;"_qs}, // asterisk operator, U+2217 ISOtech
{u"radic"_qs, u"&#8730;"_qs}, // square root = radical sign, U+221A ISOtech
{u"prop"_qs, u"&#8733;"_qs}, // proportional to, U+221D ISOtech
{u"infin"_qs, u"&#8734;"_qs}, // infinity, U+221E ISOtech
{u"ang"_qs, u"&#8736;"_qs}, // angle, U+2220 ISOamso
{u"and"_qs, u"&#8743;"_qs}, // logical and = wedge, U+2227 ISOtech
{u"or"_qs, u"&#8744;"_qs}, // logical or = vee, U+2228 ISOtech
{u"cap"_qs, u"&#8745;"_qs}, // intersection = cap, U+2229 ISOtech
{u"cup"_qs, u"&#8746;"_qs}, // union = cup, U+222A ISOtech
{u"int"_qs, u"&#8747;"_qs}, // integral, U+222B ISOtech
{u"there4"_qs, u"&#8756;"_qs}, // therefore, U+2234 ISOtech
{u"sim"_qs, u"&#8764;"_qs}, // tilde operator = varies with = similar to, U+223C ISOtech
// tilde operator is NOT the same character as the tilde, U+007E,
// although the same glyph might be used to represent both
{"cong", "&#8773;"}, // approximately equal to, U+2245 ISOtech
{"asymp", "&#8776;"}, // almost equal to = asymptotic to, U+2248 ISOamsr
{"ne", "&#8800;"}, // not equal to, U+2260 ISOtech
{"equiv", "&#8801;"}, // identical to, U+2261 ISOtech
{"le", "&#8804;"}, // less-than or equal to, U+2264 ISOtech
{"ge", "&#8805;"}, // greater-than or equal to, U+2265 ISOtech
{"sub", "&#8834;"}, // subset of, U+2282 ISOtech
{"sup", "&#8835;"}, // superset of, U+2283 ISOtech
{"nsub", "&#8836;"}, // not a subset of, U+2284 ISOamsn
{"sube", "&#8838;"}, // subset of or equal to, U+2286 ISOtech
{"supe", "&#8839;"}, // superset of or equal to, U+2287 ISOtech
{"oplus", "&#8853;"}, // circled plus = direct sum, U+2295 ISOamsb
{"otimes", "&#8855;"}, // circled times = vector product, U+2297 ISOamsb
{"perp", "&#8869;"}, // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
{"sdot", "&#8901;"}, // dot operator, U+22C5 ISOamsb
{u"cong"_qs, u"&#8773;"_qs}, // approximately equal to, U+2245 ISOtech
{u"asymp"_qs, u"&#8776;"_qs}, // almost equal to = asymptotic to, U+2248 ISOamsr
{u"ne"_qs, u"&#8800;"_qs}, // not equal to, U+2260 ISOtech
{u"equiv"_qs, u"&#8801;"_qs}, // identical to, U+2261 ISOtech
{u"le"_qs, u"&#8804;"_qs}, // less-than or equal to, U+2264 ISOtech
{u"ge"_qs, u"&#8805;"_qs}, // greater-than or equal to, U+2265 ISOtech
{u"sub"_qs, u"&#8834;"_qs}, // subset of, U+2282 ISOtech
{u"sup"_qs, u"&#8835;"_qs}, // superset of, U+2283 ISOtech
{u"nsub"_qs, u"&#8836;"_qs}, // not a subset of, U+2284 ISOamsn
{u"sube"_qs, u"&#8838;"_qs}, // subset of or equal to, U+2286 ISOtech
{u"supe"_qs, u"&#8839;"_qs}, // superset of or equal to, U+2287 ISOtech
{u"oplus"_qs, u"&#8853;"_qs}, // circled plus = direct sum, U+2295 ISOamsb
{u"otimes"_qs, u"&#8855;"_qs}, // circled times = vector product, U+2297 ISOamsb
{u"perp"_qs, u"&#8869;"_qs}, // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
{u"sdot"_qs, u"&#8901;"_qs}, // dot operator, U+22C5 ISOamsb
// dot operator is NOT the same character as U+00B7 middle dot
// Miscellaneous Technical
{"lceil", "&#8968;"}, // left ceiling = APL upstile, U+2308 ISOamsc
{"rceil", "&#8969;"}, // right ceiling, U+2309 ISOamsc
{"lfloor", "&#8970;"}, // left floor = APL downstile, U+230A ISOamsc
{"rfloor", "&#8971;"}, // right floor, U+230B ISOamsc
{"lang", "&#9001;"}, // left-pointing angle bracket = bra, U+2329 ISOtech
{u"lceil"_qs, u"&#8968;"_qs}, // left ceiling = APL upstile, U+2308 ISOamsc
{u"rceil"_qs, u"&#8969;"_qs}, // right ceiling, U+2309 ISOamsc
{u"lfloor"_qs, u"&#8970;"_qs}, // left floor = APL downstile, U+230A ISOamsc
{u"rfloor"_qs, u"&#8971;"_qs}, // right floor, U+230B ISOamsc
{u"lang"_qs, u"&#9001;"_qs}, // left-pointing angle bracket = bra, U+2329 ISOtech
// lang is NOT the same character as U+003C 'less than sign'
// or U+2039 'single left-pointing angle quotation mark'
{"rang", "&#9002;"}, // right-pointing angle bracket = ket, U+232A ISOtech
{u"rang"_qs, u"&#9002;"_qs}, // right-pointing angle bracket = ket, U+232A ISOtech
// rang is NOT the same character as U+003E 'greater than sign'
// or U+203A 'single right-pointing angle quotation mark'
// Geometric Shapes
{"loz", "&#9674;"}, // lozenge, U+25CA ISOpub
{u"loz"_qs, u"&#9674;"_qs}, // lozenge, U+25CA ISOpub
// Miscellaneous Symbols
{"spades", "&#9824;"}, // black spade suit, U+2660 ISOpub
{"clubs", "&#9827;"}, // black club suit = shamrock, U+2663 ISOpub
{"hearts", "&#9829;"}, // black heart suit = valentine, U+2665 ISOpub
{"diams", "&#9830;"} // black diamond suit, U+2666 ISOpub
{u"spades"_qs, u"&#9824;"_qs}, // black spade suit, U+2660 ISOpub
{u"clubs"_qs, u"&#9827;"_qs}, // black club suit = shamrock, U+2663 ISOpub
{u"hearts"_qs, u"&#9829;"_qs}, // black heart suit = valentine, U+2665 ISOpub
{u"diams"_qs, u"&#9830;"_qs} // black diamond suit, U+2666 ISOpub
};
return HTMLEntities.value(name);
}
@@ -360,23 +361,23 @@ namespace
// Ported to Qt from KDElibs4
QDateTime parseDate(const QString &string)
{
const char shortDay[][4] =
const char16_t shortDay[][4] =
{
"Mon", "Tue", "Wed",
"Thu", "Fri", "Sat",
"Sun"
u"Mon", u"Tue", u"Wed",
u"Thu", u"Fri", u"Sat",
u"Sun"
};
const char longDay[][10] =
const char16_t longDay[][10] =
{
"Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday",
"Sunday"
u"Monday", u"Tuesday", u"Wednesday",
u"Thursday", u"Friday", u"Saturday",
u"Sunday"
};
const char shortMonth[][4] =
const char16_t shortMonth[][4] =
{
"Jan", "Feb", "Mar", "Apr",
"May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec"
u"Jan", u"Feb", u"Mar", u"Apr",
u"May", u"Jun", u"Jul", u"Aug",
u"Sep", u"Oct", u"Nov", u"Dec"
};
const QString str = string.trimmed();
@@ -391,7 +392,7 @@ namespace
int nmin = 8;
int nsec = 9;
// Also accept obsolete form "Weekday, DD-Mon-YY HH:MM:SS ±hhmm"
QRegularExpression rx {"^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$"};
QRegularExpression rx {u"^(?:([A-Z][a-z]+),\\s*)?(\\d{1,2})(\\s+|-)([^-\\s]+)(\\s+|-)(\\d{2,4})\\s+(\\d\\d):(\\d\\d)(?::(\\d\\d))?\\s+(\\S+)$"_qs};
QRegularExpressionMatch rxMatch;
QStringList parts;
if (str.indexOf(rx, 0, &rxMatch) == 0)
@@ -406,7 +407,7 @@ namespace
else
{
// Check for the obsolete form "Wdy Mon DD HH:MM:SS YYYY"
rx = QRegularExpression {"^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$"};
rx = QRegularExpression {u"^([A-Z][a-z]+)\\s+(\\S+)\\s+(\\d\\d)\\s+(\\d\\d):(\\d\\d):(\\d\\d)\\s+(\\d\\d\\d\\d)$"_qs};
if (str.indexOf(rx, 0, &rxMatch) != 0)
return QDateTime::currentDateTime();
@@ -465,7 +466,7 @@ namespace
bool negOffset = false;
if (parts.count() > 10)
{
rx = QRegularExpression {"^([+-])(\\d\\d)(\\d\\d)$"};
rx = QRegularExpression {u"^([+-])(\\d\\d)(\\d\\d)$"_qs};
if (parts[10].indexOf(rx, 0, &rxMatch) == 0)
{
// It's a UTC offset ±hhmm
@@ -628,9 +629,9 @@ void Parser::parseRssArticle(QXmlStreamReader &xml)
}
else if (name == QLatin1String("enclosure"))
{
if (xml.attributes().value("type") == QLatin1String("application/x-bittorrent"))
if (xml.attributes().value(u"type"_qs) == QLatin1String("application/x-bittorrent"))
article[Article::KeyTorrentURL] = xml.attributes().value(QLatin1String("url")).toString();
else if (xml.attributes().value("type").isEmpty())
else if (xml.attributes().value(u"type"_qs).isEmpty())
altTorrentUrl = xml.attributes().value(QLatin1String("url")).toString();
}
else if (name == QLatin1String("link"))
@@ -786,7 +787,7 @@ void Parser::parseAtomArticle(QXmlStreamReader &xml)
void Parser::parseAtomChannel(QXmlStreamReader &xml)
{
m_baseUrl = xml.attributes().value("xml:base").toString();
m_baseUrl = xml.attributes().value(u"xml:base"_qs).toString();
while (!xml.atEnd())
{