From: Martin Mares Date: Tue, 20 Aug 2019 18:56:45 +0000 (+0200) Subject: Reformed typesetting of URLs X-Git-Tag: v2.0~4 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=3ce42872f9e17816028fcbea12a9bf6e9fe675e2;p=ucwmac.git Reformed typesetting of URLs It was a big obstacle that URLs do not work properly in nested contexts and that special characters are sometimes broken in PDF links. --- diff --git a/test-ref.tex b/test-ref.tex index a503c76..f1c7041 100644 --- a/test-ref.tex +++ b/test-ref.tex @@ -27,9 +27,9 @@ \chapter[sec]{Caput secunda} -\url{http://pruvodce.ucw.cz/errata%21.html} +\url{http://pruvodce.ucw.cz/err_ata\\%21.html?a=b&c=d#x} -\linkurl{http://pruvodce.ucw.cz/}{See the web page.} +\linkurl{http://pruvodce.ucw.cz/}{See the web page \displayurl.} \chapter[ter]{Caput tertia} diff --git a/ucw-link.tex b/ucw-link.tex index 58a1c67..0eef8f8 100644 --- a/ucw-link.tex +++ b/ucw-link.tex @@ -40,35 +40,109 @@ % \linkpage{page}{text} \def\linkpage#1#2{\ifclickable\pdfstartlink\commonlinkargs goto page #1 {/Fit}\relax #2\pdfendlink\else #2\fi} +% Typesetting of URLs is tricky: +% +% - They can contain various characters considered special by TeX. +% - We want to adjust appearance of "//", "_", "~" according to font. +% - We want to insert a breakpoint after "/", "?", "&". +% - We need the raw form of the URL for PDF links. +% - We cannot rely purely on changing catcodes, as we sometimes need +% to parse URLs given as arguments of macros. +% - Sometimes, it is useful to insert a manual line break to the URL. +% +% Therefore: +% +% - In our front-end macros (\url, \linkurl) we switch catcodes +% to accept '%' and '#' as normal characters; if the macros are +% called indirectly, these characters must be escaped as '\%' and '\#'. +% - The URL is preprocessed: special characters (with their original +% catcode) are replaced by calls of auxiliary macros. +% - When producing PDF links, the auxiliary macros expand to ordinary +% ASCII characters. +% - When typesetting the URL, the auxiliary macros expand differently. +% Furthermore, they can be temporarily re-defined in the \urlprefix macro. +% - "\\" (which is usually called to produce a line break) disappears in PDF links. +% - If you call a custom macro in the URL, you can modify its definition +% for typesetting in \urlprefix and for PDF links by appending to \urlplainascii. + % Typeset a clickable URL % \url{http://example.com/} -\def\url{\begingroup\allowurlchars\urlaux} -\def\urlaux#1{\linkurlaux{#1}{\displayurl #1^^X}} +\def\url{\begingroup\begingroup\allowurlchars\urlaux} +\def\urlaux#1{\urlauxarg{#1}\linkurlauxB{\displayurl}} % Typeset a clickable link to the given URL % \linkurl{http://example.com/}{text} -\def\linkurl{\begingroup\allowurlchars\linkurlaux} -\def\linkurlaux#1#2{\endgroup +\def\linkurl{\begingroup\begingroup\allowurlchars\linkurlaux} +\def\linkurlaux#1{\urlauxarg{#1}\linkurlauxB} +\def\linkurlauxB#1{% \leavevmode \ifclickable - \pdfstartlink\commonlinkargs user {/Subtype/Link /A << /Type/Action /S/URI /URI(#1) >>}\relax + {% + \urlplainascii + \pdfstartlink\commonlinkargs user {/Subtype/Link /A << /Type/Action /S/URI /URI(\tmpb) >>}\relax + }% \fi - #2% + #1% \ifclickable \pdfendlink \fi + \endgroup % opened in \url or \linkurl } -% Catcode each special character valid in URL to 'other' -\def\allowurlchars{\catcode`\#=12\catcode`\_=12\catcode`\%=12\catcode`\&=12\catcode`\$=12\catcode`\~=12\relax} +% Catcode '%' and '#' to 'other' +\def\allowurlchars{\catcode`\%=12\catcode`\#=12\relax} + +\def\urlauxarg#1{% + \endgroup % opened in \url or \linkurl + \toks0={#1}\edef\tmpb{\the\toks0}% + \replacestrings{//}{\urlslashslash}% + \replacestrings{_}{\urlunderscore}% + \replacestrings{~}{\urltilde}% + \replacestrings{/}{\urlslash}% + \replacestrings{?}{\urlquestion}% + \replacestrings{&}{\urlamp}% + \replacestrings{=}{\urlequal}% +} -% Style switches and the beginning/end of an URL +% Style switches at the beginning/end of an URL (feel free to re-define them) \let\urlprefix\it \let\urlsuffix\/ +% Default appearance of characters special in URLs +\def\urlslashslash{/\kern\urlinterslashkern/} +\def\urlunderscore{\_} +\def\urltilde{{\tt\char126}} +\def\urlslash{/\penalty100\relax} +\def\urlquestion{?\penalty100\relax} +\def\urlamp{\&\penalty100\relax} +\def\urlequal{=\penalty100\relax} + % Kern to place between "//" in an URL \newdimen\urlinterslashkern \urlinterslashkern=-0.1em -% Internal macro for typesetting of URLs -\def\displayurl#1:#2#3^^X{{\urlprefix #1:#2\ifx#2/\kern\urlinterslashkern\fi#3\urlsuffix}} +% Switch auxiliary macros, so special characters expand to plain ASCII characters +% (since we need to replace them in the expand processor, we cannot use \let for that) +% If you want to modify expansion of your macros, extend \urlplainascii using \appendef. +{ +\lccode`A=`\_ +\lccode`B=`\~ +\lccode`C=`\& +\lccode`D=`\% +\lccode`E=`\# +\lowercase{\gdef\urlplainascii{% + \def\urlslashslash{//}% + \def\urlunderscore{A}% + \def\urltilde{B}% + \def\urlslash{/}% + \def\urlquestion{?}% + \def\urlamp{C}% + \def\urlequal{=}% + \def\%{D}% + \def\#{E}% + \def\\{}% +}}} + +% Typeset the URL stored in \tmpb. In most cases, this is used internally by \url, +% but you can call it explicity from the second argument of \linkurl to typeset the current URL. +\def\displayurl{{\urlprefix\tmpb\urlsuffix}} diff --git a/ucwmac2.tex b/ucwmac2.tex index d017369..1c29317 100644 --- a/ucwmac2.tex +++ b/ucwmac2.tex @@ -61,6 +61,17 @@ \def\ucwwarn#1{\immediate\write16{*** UCWmac warning: #1 ***}} +% Replace all occurrences of #1 in \tmpb by #2. +% Thanks to Petr Olsak's OPmac for an efficient implementation. +\bgroup \catcode`!=3 \catcode`?=3 +\gdef\replacestrings#1#2{\long\def\replacestringsA##1#1{\def\tmpb{##1}\replacestringsB}% + \long\def\replacestringsB##1#1{\ifx!##1\relax \else\appendef\tmpb{#2##1}% + \expandafter\replacestringsB\fi}% + \expandafter\replacestringsA\tmpb?#1!#1% + \long\def\replacestringsA##1?{\def\tmpb{##1}}\expandafter\replacestringsA\tmpb +} +\egroup + %%% Page size and margins %%% % If you modify these registers, call \setuppage afterwards