mirror of
https://github.com/AntennaPod/AntennaPod.git
synced 2025-10-29 11:49:33 +00:00
Highlight plain-text links in episode description (#7581)
This commit is contained in:
parent
55d3b743d1
commit
85313e28b6
@ -0,0 +1,145 @@
|
||||
package de.danoeh.antennapod.ui.cleaner;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.nodes.Node;
|
||||
import org.jsoup.nodes.TextNode;
|
||||
import org.jsoup.select.NodeTraversor;
|
||||
import org.jsoup.select.NodeVisitor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PlainTextLinksConverter {
|
||||
private static final Pattern HTTP_LINK_REGEX = Pattern.compile(
|
||||
"(?:https?://(?:www\\.)?|www\\.)" // http(s)://[www.] OR www.
|
||||
+ "[-a-zA-Z0-9@:%._+~#=]{1,256}" // Domain name
|
||||
+ "\\.[a-zA-Z]{2,6}\\b" // Top-level domain
|
||||
+ "[-a-zA-Z0-9@:%_+.*~#?!&$/=()\\[\\],;]*", // Path, query params
|
||||
Pattern.CASE_INSENSITIVE
|
||||
);
|
||||
protected static final List<String> NOT_ALLOWED_END_CHARS = List.of(
|
||||
".", ",", ";", ":", "?", "!", ")", "(", "[", "]", "-", "_", "~", "#", "@", "$", "*", "+");
|
||||
|
||||
private static final String STARTS_WITH_HTTP = "(?i)https?://.*";
|
||||
private static final String ANCHOR_TAG = "a";
|
||||
private static final String ANCHOR_ADDRESS = "href";
|
||||
|
||||
/**
|
||||
* Provided text can be an HTML document or plain text.
|
||||
* It may contain a mixture of plain-text links and HTML links.
|
||||
* Only plain-text links will be converted to HTML {@code <a>} tags.
|
||||
*/
|
||||
public static String convertLinksToHtml(String text) {
|
||||
if (text == null || text.isEmpty()) {
|
||||
return text;
|
||||
}
|
||||
try {
|
||||
Document doc = Jsoup.parse(text);
|
||||
convertLinksToHtml(doc);
|
||||
return doc.body().html();
|
||||
} catch (Exception e) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
public static void convertLinksToHtml(Document doc) {
|
||||
if (doc == null) {
|
||||
return;
|
||||
}
|
||||
NodeTraversor.traverse(new LinkConvertingVisitor(), doc.body());
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that URLs are only converted if they are not already part of an existing anchor tag.
|
||||
* Document structure remains untouched, logic affects only {@link TextNode} - leaf element with no tags in it.
|
||||
* One {@link TextNode} is replaced with multiple {@link Element}s:
|
||||
* <li>{@link TextNode} with text before the link</li>
|
||||
* <li>{@link Element} with the link tag</li>
|
||||
* <li>{@link TextNode} with text after the link</li>
|
||||
*/
|
||||
private static class LinkConvertingVisitor implements NodeVisitor {
|
||||
@Override
|
||||
public void head(@NonNull Node node, int depth) {
|
||||
if (!(node instanceof TextNode textNode)) {
|
||||
return;
|
||||
} else if (isInsideAnchor(textNode)) {
|
||||
return;
|
||||
}
|
||||
String originalText = textNode.getWholeText();
|
||||
Matcher matcher = HTTP_LINK_REGEX.matcher(originalText);
|
||||
|
||||
if (!matcher.find()) {
|
||||
return;
|
||||
}
|
||||
List<Node> newNodes = new ArrayList<>();
|
||||
int lastEnd = 0;
|
||||
matcher.reset();
|
||||
|
||||
while (matcher.find()) {
|
||||
String url = matcher.group();
|
||||
if (endsWithPunctuation(url)) {
|
||||
continue;
|
||||
}
|
||||
if (matcher.start() > lastEnd) {
|
||||
newNodes.add(new TextNode(originalText.substring(lastEnd, matcher.start())));
|
||||
}
|
||||
newNodes.add(link(url));
|
||||
lastEnd = matcher.end();
|
||||
}
|
||||
|
||||
if (lastEnd < originalText.length()) {
|
||||
newNodes.add(new TextNode(originalText.substring(lastEnd)));
|
||||
}
|
||||
|
||||
if (!newNodes.isEmpty()) {
|
||||
Node parent = textNode.parent();
|
||||
if (parent instanceof Element parentElement) {
|
||||
int index = textNode.siblingIndex();
|
||||
textNode.remove();
|
||||
parentElement.insertChildren(index, newNodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Element link(String detectedUrl) {
|
||||
var url = detectedUrl;
|
||||
if (!detectedUrl.matches(STARTS_WITH_HTTP)) {
|
||||
url = "https://" + url;
|
||||
}
|
||||
return new Element(ANCHOR_TAG).attr(ANCHOR_ADDRESS, url).text(detectedUrl);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tail(@NonNull Node node, int depth) {
|
||||
//not needed
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isInsideAnchor(Node node) {
|
||||
Node current = node;
|
||||
while (current != null) {
|
||||
if (current instanceof Element currentElement) {
|
||||
if (ANCHOR_TAG.equalsIgnoreCase(currentElement.tagName())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
current = current.parent();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static boolean endsWithPunctuation(String url) {
|
||||
for (String endChar : NOT_ALLOWED_END_CHARS) {
|
||||
if (url.endsWith(endChar)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -76,6 +76,7 @@ public class ShownotesCleaner {
|
||||
|
||||
/**
|
||||
* Applies an app-specific CSS stylesheet and adds timecode links (optional).
|
||||
* Also converts plain-text links to HTML links.
|
||||
* <p/>
|
||||
* This method does NOT change the original shownotes string of the shownotesProvider object and it should
|
||||
* also not be changed by the caller.
|
||||
@ -99,6 +100,7 @@ public class ShownotesCleaner {
|
||||
Document document = Jsoup.parse(shownotes);
|
||||
cleanCss(document);
|
||||
document.head().appendElement("style").attr("type", "text/css").text(webviewStyle);
|
||||
PlainTextLinksConverter.convertLinksToHtml(document);
|
||||
addTimecodes(document);
|
||||
document.body().attr("dir", "auto");
|
||||
return document.toString();
|
||||
|
||||
@ -0,0 +1,154 @@
|
||||
package de.danoeh.antennapod.ui.cleaner;
|
||||
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static de.danoeh.antennapod.ui.cleaner.PlainTextLinksConverter.NOT_ALLOWED_END_CHARS;
|
||||
import org.junit.Test;
|
||||
import java.util.List;
|
||||
|
||||
public class PlainTextLinksConverterTest {
|
||||
|
||||
@Test
|
||||
public void testConvertPlainTextLinksToHtml() {
|
||||
final String link1 = "https://url.to/link";
|
||||
final String textWithLink = "text " + link1;
|
||||
assertEquals("text " + makeLinkHtml(link1), PlainTextLinksConverter.convertLinksToHtml(textWithLink));
|
||||
|
||||
final String link2 = "https://t.me/link";
|
||||
final String textWithLink2 = "text " + link2;
|
||||
assertEquals("text " + makeLinkHtml(link2), PlainTextLinksConverter.convertLinksToHtml(textWithLink2));
|
||||
|
||||
final String text = "artist here: www.example.com";
|
||||
final String expected = "artist here: <a href=\"https://www.example.com\">www.example.com</a>";
|
||||
assertEquals(expected, PlainTextLinksConverter.convertLinksToHtml(text));
|
||||
|
||||
final String textWithTwoLinks = "text " + link1 + " and " + link2;
|
||||
final String expectedTwoLinks = "text " + makeLinkHtml(link1) + " and " + makeLinkHtml(link2);
|
||||
assertEquals(expectedTwoLinks, PlainTextLinksConverter.convertLinksToHtml(textWithTwoLinks));
|
||||
|
||||
final String textWithMixturePlainTextAndHtml = "text " + link1 + " and " + makeLinkHtml(link2);
|
||||
final String expectedMixture = "text " + makeLinkHtml(link1) + " and " + makeLinkHtml(link2);
|
||||
assertEquals(expectedMixture, PlainTextLinksConverter.convertLinksToHtml(textWithMixturePlainTextAndHtml));
|
||||
|
||||
final String textWithSpecialChars = "text'" + link1 + " and=" + link2;
|
||||
final String expectedWithSpecialChars = "text'" + makeLinkHtml(link1) + " and=" + makeLinkHtml(link2);
|
||||
assertEquals(expectedWithSpecialChars, PlainTextLinksConverter.convertLinksToHtml(textWithSpecialChars));
|
||||
|
||||
final String linkWithParams = "http://t.me/link#mark?param1=1¶m2=true;param3=true";
|
||||
final String textWithParams = "text " + linkWithParams + " after-text";
|
||||
final String expectedWithParams = "text " + makeLinkHtml(linkWithParams) + " after-text";
|
||||
assertEquals(expectedWithParams, PlainTextLinksConverter.convertLinksToHtml(textWithParams));
|
||||
|
||||
final String linkWithComma = "https://example.org/%D0%%86_(%D1%%BC,_2020";
|
||||
final String textWithComma = "text " + linkWithComma;
|
||||
assertEquals("text " + makeLinkHtml(linkWithComma), PlainTextLinksConverter.convertLinksToHtml(textWithComma));
|
||||
|
||||
final String linkWithDot = "https://www.ietf.org/rfc/rfc3986.txt";
|
||||
final String textWithDot = "text " + linkWithDot;
|
||||
assertEquals("text " + makeLinkHtml(linkWithDot), PlainTextLinksConverter.convertLinksToHtml(textWithDot));
|
||||
|
||||
final String linkWithTilda = "https://www.example.org/valid/-~.,$/url/";
|
||||
final String textWithTilda = "text " + linkWithTilda;
|
||||
assertEquals("text " + makeLinkHtml(linkWithTilda), PlainTextLinksConverter.convertLinksToHtml(textWithTilda));
|
||||
|
||||
final String linkWithExclamation = "http://www.example.com/index.php?id=123&v=wall#!/index.php?id=234";
|
||||
final String textWithExclamation = "text " + linkWithExclamation;
|
||||
assertEquals("text " + makeLinkHtml(linkWithExclamation),
|
||||
PlainTextLinksConverter.convertLinksToHtml(textWithExclamation));
|
||||
|
||||
final String linkWithBrackets = "http://www.example.com/index.php?bar[]=1&bar[]=2";
|
||||
final String textWithBrackets = "text " + linkWithBrackets;
|
||||
assertEquals("text " + makeLinkHtml(linkWithBrackets),
|
||||
PlainTextLinksConverter.convertLinksToHtml(textWithBrackets));
|
||||
|
||||
final String linkWithAsterisk = "https://archive.org/web/*/http://www.example.com/";
|
||||
final String textWithAsterisk = "text " + linkWithAsterisk;
|
||||
assertEquals("text " + makeLinkHtml(linkWithAsterisk),
|
||||
PlainTextLinksConverter.convertLinksToHtml(textWithAsterisk));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBrokenLinksAreNotCreated() {
|
||||
final String linkWithBrackets = "Sign up now (http://example.com/abc)";
|
||||
assertEquals(linkWithBrackets, PlainTextLinksConverter.convertLinksToHtml(linkWithBrackets));
|
||||
|
||||
final String linkWithBrackets2 = "Sign up now (http://example.com/abc)! please";
|
||||
assertEquals(linkWithBrackets2, PlainTextLinksConverter.convertLinksToHtml(linkWithBrackets2));
|
||||
|
||||
final String linkWithDot = "To read on, visit https://example.com.";
|
||||
assertEquals(linkWithDot, PlainTextLinksConverter.convertLinksToHtml(linkWithDot));
|
||||
|
||||
//we choose to ignore links like this, even though they are valid
|
||||
final String validLinkIgnored = "Visit https://example.com/wiki_(url+rules)";
|
||||
assertEquals(validLinkIgnored, PlainTextLinksConverter.convertLinksToHtml(validLinkIgnored));
|
||||
|
||||
final String link = "https://example.com/abc";
|
||||
NOT_ALLOWED_END_CHARS.forEach(end ->
|
||||
assertEquals(link + end, PlainTextLinksConverter.convertLinksToHtml(link + end))
|
||||
);
|
||||
|
||||
final String firstLinkIgnored = "(" + link + ") and " + link;
|
||||
assertEquals("(https://example.com/abc) and " + makeLinkHtml(link),
|
||||
PlainTextLinksConverter.convertLinksToHtml(firstLinkIgnored));
|
||||
|
||||
final String secondLinkIgnored = "text " + link + " and (" + link + ")";
|
||||
assertEquals("text " + makeLinkHtml(link) + " and (https://example.com/abc)",
|
||||
PlainTextLinksConverter.convertLinksToHtml(secondLinkIgnored));
|
||||
|
||||
final String middleLinkIgnored = "text " + link + " and (" + link + ") and " + link;
|
||||
assertEquals("text " + makeLinkHtml(link) + " and (https://example.com/abc) and " + makeLinkHtml(link),
|
||||
PlainTextLinksConverter.convertLinksToHtml(middleLinkIgnored));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExistingLinksArePreserved() {
|
||||
var links = List.of(
|
||||
"Click <a alt=\"abc\" href=\"http://url.to/link\">http://url.to/link, this link</a>",
|
||||
"<a href=\"http://domain.org/link\">domain.org</a>",
|
||||
"you can find it on <a href=\"http://xy.org\">our new website http://xy.org</a>",
|
||||
"you can find it on <a href=\"http://xy.org/newlanding\">our new website http://xy.org</a>",
|
||||
"<p><img src=\"https://url.to/i.jpg\" alt=\"https://url.to/i.jpg\"></p>",
|
||||
"text \n<audio src=\"https://url.to/i.mp3\" alt=\"https://url.to/i.mp3\">\n text \n</audio>",
|
||||
"<a href=\"https://example.com/p/ai-fakers?utm_source=example&utm_medium=email\">AI interview</a> - <em>01:57:01</em>",
|
||||
"sign up for our premium feed here! <a href=\"https://www.example.com/url?q=https://example.com/join&source=gmail-imap&ust=123&usg=AOvVaw123gzEv9s9\"><strong>https://example.com/join</strong></a>",
|
||||
"you can do so here:<a href=\"https://www.example.com/url?q=https://example.com/button&source=gmail-imap&ust=123&usg=AOvV123jw--CX123tATY\"><strong>https://example.com/button</strong></a>",
|
||||
"LINKS:<a href=\"https://www.example.com/url?q=https://example.org/&source=gmail-imap&ust=123&usg=AOvVa123GJxenALD\"><strong>Example</strong></a>",
|
||||
"<a href=\"https://www.example.com/url?q=https://example.org/buttons/ask-me-chili-cheese-fries&source=gmail-imap&ust=123&usg=AOvVaw2oFNwzuvrfrokwHf6zq1P4\"><strong>Example</strong></a>",
|
||||
"<p><a href=\"https://example.com/media/FN_123zV2i?format=png&name=900x900\">A picture of the photo in question</a></p>",
|
||||
"<a href=\"https://www.example.com/redirect?event=video_description&redir_token=123l&q=https%3A%2F%2Fexample.com%2Fshop%2Fbook%2F&v=4iOzkYTrjzg\">https://example.com/shop/book/</a>",
|
||||
"<a href=\"https://www.example.com/redirect?event=video_description&redir_token=123Ws&q=https%3A%2F%2Fexample.me%2FyH6x%2Fgx5ywe7g&v=yIbY7x5zQO8\">https://example.me/yH6x/gx5ywe7g</a>",
|
||||
""
|
||||
);
|
||||
links.forEach(link -> assertEquals(link, PlainTextLinksConverter.convertLinksToHtml(link)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConvertToHtmlWhenNoLinksAreDetected() {
|
||||
assertNull(PlainTextLinksConverter.convertLinksToHtml((String) null));
|
||||
assertEquals("", PlainTextLinksConverter.convertLinksToHtml(""));
|
||||
|
||||
final String text = "plain text";
|
||||
assertEquals(text, PlainTextLinksConverter.convertLinksToHtml(text));
|
||||
|
||||
final String specialCharacters = "text with ' special \" characters !@#$%^&*()<>?123";
|
||||
var expected = specialCharacters.replace("&", "&");
|
||||
expected = expected.replace("<", "<");
|
||||
expected = expected.replace(">", ">");
|
||||
assertEquals(expected, PlainTextLinksConverter.convertLinksToHtml(specialCharacters));
|
||||
|
||||
final String textWithDots = "\"Text With...Dots Works\"";
|
||||
assertEquals(textWithDots, PlainTextLinksConverter.convertLinksToHtml(textWithDots));
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds {@code <a href>..</a>} around provided string
|
||||
*/
|
||||
private static String makeLinkHtml(String plain) {
|
||||
if (plain == null || plain.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
String encodedPlain = plain.replace("&", "&");
|
||||
return "<a href=\"" + encodedPlain + "\">" + encodedPlain + "</a>";
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user