]> mj.ucw.cz Git - ucwmac.git/commitdiff
Reformed typesetting of URLs
authorMartin Mares <mj@ucw.cz>
Tue, 20 Aug 2019 18:56:45 +0000 (20:56 +0200)
committerMartin Mares <mj@ucw.cz>
Tue, 20 Aug 2019 18:56:45 +0000 (20:56 +0200)
It was a big obstacle that URLs do not work properly in nested
contexts and that special characters are sometimes broken in PDF links.

test-ref.tex
ucw-link.tex
ucwmac2.tex

index a503c76166da4696b0abc668261fab973bdd7412..f1c70415e83b88e076db8f49eb95a24b641c411d 100644 (file)
@@ -27,9 +27,9 @@
 
 \chapter[sec]{Caput secunda}
 
-\url{http://pruvodce.ucw.cz/errata%21.html}
+\url{http://pruvodce.ucw.cz/err_ata\\%21.html?a=b&c=d#x}
 
-\linkurl{http://pruvodce.ucw.cz/}{See the web page.}
+\linkurl{http://pruvodce.ucw.cz/}{See the web page \displayurl.}
 
 \chapter[ter]{Caput tertia}
 
index 58a1c67c9253c91e55406728a956611119cabaf5..0eef8f84383b7a5f9b0f68524ad214629540e984 100644 (file)
 % \linkpage{page}{text}
 \def\linkpage#1#2{\ifclickable\pdfstartlink\commonlinkargs goto page #1 {/Fit}\relax #2\pdfendlink\else #2\fi}
 
+% Typesetting of URLs is tricky:
+%
+%   - They can contain various characters considered special by TeX.
+%   - We want to adjust appearance of "//", "_", "~" according to font.
+%   - We want to insert a breakpoint after "/", "?", "&".
+%   - We need the raw form of the URL for PDF links.
+%   - We cannot rely purely on changing catcodes, as we sometimes need
+%     to parse URLs given as arguments of macros.
+%   - Sometimes, it is useful to insert a manual line break to the URL.
+%
+% Therefore:
+%
+%   - In our front-end macros (\url, \linkurl) we switch catcodes
+%     to accept '%' and '#' as normal characters; if the macros are
+%     called indirectly, these characters must be escaped as '\%' and '\#'.
+%   - The URL is preprocessed: special characters (with their original
+%     catcode) are replaced by calls of auxiliary macros.
+%   - When producing PDF links, the auxiliary macros expand to ordinary
+%     ASCII characters.
+%   - When typesetting the URL, the auxiliary macros expand differently.
+%     Furthermore, they can be temporarily re-defined in the \urlprefix macro.
+%   - "\\" (which is usually called to produce a line break) disappears in PDF links.
+%   - If you call a custom macro in the URL, you can modify its definition
+%     for typesetting in \urlprefix and for PDF links by appending to \urlplainascii.
+
 % Typeset a clickable URL
 % \url{http://example.com/}
-\def\url{\begingroup\allowurlchars\urlaux}
-\def\urlaux#1{\linkurlaux{#1}{\displayurl #1^^X}}
+\def\url{\begingroup\begingroup\allowurlchars\urlaux}
+\def\urlaux#1{\urlauxarg{#1}\linkurlauxB{\displayurl}}
 
 % Typeset a clickable link to the given URL
 % \linkurl{http://example.com/}{text}
-\def\linkurl{\begingroup\allowurlchars\linkurlaux}
-\def\linkurlaux#1#2{\endgroup
+\def\linkurl{\begingroup\begingroup\allowurlchars\linkurlaux}
+\def\linkurlaux#1{\urlauxarg{#1}\linkurlauxB}
+\def\linkurlauxB#1{%
        \leavevmode
        \ifclickable
-               \pdfstartlink\commonlinkargs user {/Subtype/Link /A << /Type/Action /S/URI /URI(#1) >>}\relax
+               {%
+                       \urlplainascii
+                       \pdfstartlink\commonlinkargs user {/Subtype/Link /A << /Type/Action /S/URI /URI(\tmpb) >>}\relax
+               }%
        \fi
-       #2%
+       #1%
        \ifclickable
                \pdfendlink
        \fi
+       \endgroup               % opened in \url or \linkurl
 }
 
-% Catcode each special character valid in URL to 'other'
-\def\allowurlchars{\catcode`\#=12\catcode`\_=12\catcode`\%=12\catcode`\&=12\catcode`\$=12\catcode`\~=12\relax}
+% Catcode '%' and '#' to 'other'
+\def\allowurlchars{\catcode`\%=12\catcode`\#=12\relax}
+
+\def\urlauxarg#1{%
+       \endgroup               % opened in \url or \linkurl
+       \toks0={#1}\edef\tmpb{\the\toks0}%
+       \replacestrings{//}{\urlslashslash}%
+       \replacestrings{_}{\urlunderscore}%
+       \replacestrings{~}{\urltilde}%
+       \replacestrings{/}{\urlslash}%
+       \replacestrings{?}{\urlquestion}%
+       \replacestrings{&}{\urlamp}%
+       \replacestrings{=}{\urlequal}%
+}
 
-% Style switches and the beginning/end of an URL
+% Style switches at the beginning/end of an URL (feel free to re-define them)
 \let\urlprefix\it
 \let\urlsuffix\/
 
+% Default appearance of characters special in URLs
+\def\urlslashslash{/\kern\urlinterslashkern/}
+\def\urlunderscore{\_}
+\def\urltilde{{\tt\char126}}
+\def\urlslash{/\penalty100\relax}
+\def\urlquestion{?\penalty100\relax}
+\def\urlamp{\&\penalty100\relax}
+\def\urlequal{=\penalty100\relax}
+
 % Kern to place between "//" in an URL
 \newdimen\urlinterslashkern
 \urlinterslashkern=-0.1em
 
-% Internal macro for typesetting of URLs
-\def\displayurl#1:#2#3^^X{{\urlprefix #1:#2\ifx#2/\kern\urlinterslashkern\fi#3\urlsuffix}}
+% Switch auxiliary macros, so special characters expand to plain ASCII characters
+% (since we need to replace them in the expand processor, we cannot use \let for that)
+% If you want to modify expansion of your macros, extend \urlplainascii using \appendef.
+{
+\lccode`A=`\_
+\lccode`B=`\~
+\lccode`C=`\&
+\lccode`D=`\%
+\lccode`E=`\#
+\lowercase{\gdef\urlplainascii{%
+       \def\urlslashslash{//}%
+       \def\urlunderscore{A}%
+       \def\urltilde{B}%
+       \def\urlslash{/}%
+       \def\urlquestion{?}%
+       \def\urlamp{C}%
+       \def\urlequal{=}%
+       \def\%{D}%
+       \def\#{E}%
+       \def\\{}%
+}}}
+
+% Typeset the URL stored in \tmpb. In most cases, this is used internally by \url,
+% but you can call it explicity from the second argument of \linkurl to typeset the current URL.
+\def\displayurl{{\urlprefix\tmpb\urlsuffix}}
index d0173694f5e7ebbd30b502aac077144e2edc84b9..1c29317c8073e402da2f2bca4b07597d72a03f2d 100644 (file)
 
 \def\ucwwarn#1{\immediate\write16{*** UCWmac warning: #1 ***}}
 
+% Replace all occurrences of #1 in \tmpb by #2.
+% Thanks to Petr Olsak's OPmac for an efficient implementation.
+\bgroup \catcode`!=3 \catcode`?=3
+\gdef\replacestrings#1#2{\long\def\replacestringsA##1#1{\def\tmpb{##1}\replacestringsB}%
+   \long\def\replacestringsB##1#1{\ifx!##1\relax \else\appendef\tmpb{#2##1}%
+      \expandafter\replacestringsB\fi}%
+   \expandafter\replacestringsA\tmpb?#1!#1%
+   \long\def\replacestringsA##1?{\def\tmpb{##1}}\expandafter\replacestringsA\tmpb
+}
+\egroup
+
 %%% Page size and margins %%%
 
 % If you modify these registers, call \setuppage afterwards