mirror of
https://github.com/AntennaPod/AntennaPod.git
synced 2025-10-29 11:49:33 +00:00
Fix and tune feed item duplicate guesser (#7979)
This commit is contained in:
parent
9db9dc7732
commit
0772b4998d
@ -1,7 +1,6 @@
|
||||
package de.danoeh.antennapod.storage.database;
|
||||
|
||||
import android.content.Context;
|
||||
import android.text.TextUtils;
|
||||
import android.util.Log;
|
||||
import de.danoeh.antennapod.event.FeedListUpdateEvent;
|
||||
import de.danoeh.antennapod.model.download.DownloadError;
|
||||
@ -45,38 +44,6 @@ public abstract class FeedDatabaseWriter {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a FeedItem by its identifying value.
|
||||
*/
|
||||
private static FeedItem searchFeedItemByIdentifyingValue(List<FeedItem> items, FeedItem searchItem) {
|
||||
for (FeedItem item : items) {
|
||||
if (TextUtils.equals(item.getIdentifyingValue(), searchItem.getIdentifyingValue())) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Guess if one of the items could actually mean the searched item, even if it uses another identifying value.
|
||||
* This is to work around podcasters breaking their GUIDs.
|
||||
*/
|
||||
private static FeedItem searchFeedItemGuessDuplicate(List<FeedItem> items, FeedItem searchItem) {
|
||||
// First, see if it is a well-behaving feed that contains an item with the same identifier
|
||||
for (FeedItem item : items) {
|
||||
if (FeedItemDuplicateGuesser.sameAndNotEmpty(item.getItemIdentifier(), searchItem.getItemIdentifier())) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
// Not found yet, start more expensive guessing
|
||||
for (FeedItem item : items) {
|
||||
if (FeedItemDuplicateGuesser.seemDuplicates(item, searchItem)) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds new Feeds to the database or updates the old versions if they already exists. If another Feed with the same
|
||||
* identifying value already exists, this method will add new FeedItems from the new Feed to the existing Feed.
|
||||
@ -108,6 +75,9 @@ public abstract class FeedDatabaseWriter {
|
||||
+ " already exists. Syncing new with existing one.");
|
||||
|
||||
Collections.sort(newFeed.getItems(), new FeedItemPubdateComparator());
|
||||
FeedItemDuplicateGuesserPool newFeedDuplicateGuesser = new FeedItemDuplicateGuesserPool(newFeed.getItems());
|
||||
FeedItemDuplicateGuesserPool savedFeedDuplicateGuesser
|
||||
= new FeedItemDuplicateGuesserPool(savedFeed.getItems());
|
||||
|
||||
if (newFeed.getPageNr() == savedFeed.getPageNr()) {
|
||||
savedFeed.updateFromOther(newFeed);
|
||||
@ -128,7 +98,7 @@ public abstract class FeedDatabaseWriter {
|
||||
for (int idx = 0; idx < newFeed.getItems().size(); idx++) {
|
||||
final FeedItem item = newFeed.getItems().get(idx);
|
||||
|
||||
FeedItem possibleDuplicate = searchFeedItemGuessDuplicate(newFeed.getItems(), item);
|
||||
FeedItem possibleDuplicate = newFeedDuplicateGuesser.guessDuplicate(item);
|
||||
if (!newFeed.isLocalFeed() && possibleDuplicate != null && item != possibleDuplicate) {
|
||||
// Canonical episode is the first one returned (usually oldest)
|
||||
DBWriter.addDownloadStatus(new DownloadResult(item.getTitle(),
|
||||
@ -142,9 +112,9 @@ public abstract class FeedDatabaseWriter {
|
||||
continue;
|
||||
}
|
||||
|
||||
FeedItem oldItem = searchFeedItemByIdentifyingValue(savedFeed.getItems(), item);
|
||||
FeedItem oldItem = savedFeedDuplicateGuesser.findById(item);
|
||||
if (!newFeed.isLocalFeed() && oldItem == null) {
|
||||
oldItem = searchFeedItemGuessDuplicate(savedFeed.getItems(), item);
|
||||
oldItem = savedFeedDuplicateGuesser.guessDuplicate(item);
|
||||
if (oldItem != null) {
|
||||
Log.d(TAG, "Repaired duplicate: " + oldItem + ", " + item);
|
||||
DBWriter.addDownloadStatus(new DownloadResult(item.getTitle(),
|
||||
@ -181,6 +151,7 @@ public abstract class FeedDatabaseWriter {
|
||||
} else {
|
||||
savedFeed.getItems().add(idx, item);
|
||||
}
|
||||
savedFeedDuplicateGuesser.add(item);
|
||||
|
||||
boolean shouldPerformNewEpisodesAction = item.getPubDate() == null
|
||||
|| priorMostRecentDate == null
|
||||
@ -217,7 +188,7 @@ public abstract class FeedDatabaseWriter {
|
||||
Iterator<FeedItem> it = savedFeed.getItems().iterator();
|
||||
while (it.hasNext()) {
|
||||
FeedItem feedItem = it.next();
|
||||
if (searchFeedItemByIdentifyingValue(newFeed.getItems(), feedItem) == null) {
|
||||
if (newFeedDuplicateGuesser.findById(feedItem) == null) {
|
||||
unlistedItems.add(feedItem);
|
||||
it.remove();
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ public class FeedItemDuplicateGuesser {
|
||||
return sameAndNotEmpty(canonicalizeTitle(item1.getTitle()), canonicalizeTitle(item2.getTitle()));
|
||||
}
|
||||
|
||||
private static String canonicalizeTitle(String title) {
|
||||
public static String canonicalizeTitle(String title) {
|
||||
if (title == null) {
|
||||
return "";
|
||||
}
|
||||
|
||||
@ -0,0 +1,60 @@
|
||||
package de.danoeh.antennapod.storage.database;
|
||||
|
||||
import de.danoeh.antennapod.model.feed.FeedItem;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class FeedItemDuplicateGuesserPool {
|
||||
private final Map<String, List<FeedItem>> normalizedTitles = new HashMap<>();
|
||||
private final Map<String, FeedItem> downloadUrls = new HashMap<>();
|
||||
private final Map<String, FeedItem> identifiers = new HashMap<>();
|
||||
|
||||
public FeedItemDuplicateGuesserPool(List<FeedItem> itemsList) {
|
||||
for (FeedItem item : itemsList) {
|
||||
add(item);
|
||||
}
|
||||
}
|
||||
|
||||
public void add(FeedItem item) {
|
||||
String normalizedTitle = FeedItemDuplicateGuesser.canonicalizeTitle(item.getTitle());
|
||||
if (!normalizedTitles.containsKey(normalizedTitle)) {
|
||||
normalizedTitles.put(normalizedTitle, new java.util.ArrayList<>());
|
||||
}
|
||||
normalizedTitles.get(normalizedTitle).add(item);
|
||||
if (item.getMedia() != null && !StringUtils.isEmpty(item.getMedia().getStreamUrl())
|
||||
&& !downloadUrls.containsKey(item.getMedia().getStreamUrl())) {
|
||||
downloadUrls.put(item.getMedia().getStreamUrl(), item);
|
||||
}
|
||||
if (item.getIdentifyingValue() != null && !identifiers.containsKey(item.getIdentifyingValue())) {
|
||||
identifiers.put(item.getIdentifyingValue(), item);
|
||||
}
|
||||
}
|
||||
|
||||
public FeedItem guessDuplicate(FeedItem searchItem) {
|
||||
if (searchItem.getMedia() != null && !StringUtils.isEmpty(searchItem.getMedia().getStreamUrl())
|
||||
&& downloadUrls.containsKey(searchItem.getMedia().getStreamUrl())) {
|
||||
return downloadUrls.get(searchItem.getMedia().getStreamUrl());
|
||||
}
|
||||
String normalizedTitle = FeedItemDuplicateGuesser.canonicalizeTitle(searchItem.getTitle());
|
||||
List<FeedItem> candidates = normalizedTitles.get(normalizedTitle);
|
||||
if (candidates == null) {
|
||||
return null;
|
||||
}
|
||||
for (FeedItem item : candidates) {
|
||||
if (FeedItemDuplicateGuesser.seemDuplicates(item, searchItem)) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public FeedItem findById(FeedItem item) {
|
||||
if (identifiers.containsKey(item.getIdentifyingValue())) {
|
||||
return identifiers.get(item.getIdentifyingValue());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -1,33 +1,28 @@
|
||||
package de.danoeh.antennapod.net.download.service.episode.autodownload;
|
||||
package de.danoeh.antennapod.storage.database;
|
||||
|
||||
import android.content.Context;
|
||||
|
||||
import androidx.test.platform.app.InstrumentationRegistry;
|
||||
|
||||
import de.danoeh.antennapod.model.download.DownloadError;
|
||||
import de.danoeh.antennapod.model.download.DownloadResult;
|
||||
import de.danoeh.antennapod.model.feed.Feed;
|
||||
import de.danoeh.antennapod.model.feed.FeedItem;
|
||||
import de.danoeh.antennapod.model.feed.FeedItemFilter;
|
||||
import de.danoeh.antennapod.model.feed.FeedMedia;
|
||||
import de.danoeh.antennapod.model.feed.SortOrder;
|
||||
import de.danoeh.antennapod.net.sync.serviceinterface.SynchronizationQueue;
|
||||
import de.danoeh.antennapod.net.sync.serviceinterface.SynchronizationQueueStub;
|
||||
import de.danoeh.antennapod.storage.database.DBReader;
|
||||
import de.danoeh.antennapod.storage.database.DBWriter;
|
||||
import de.danoeh.antennapod.storage.database.FeedDatabaseWriter;
|
||||
import de.danoeh.antennapod.storage.database.PodDBAdapter;
|
||||
import org.junit.After;
|
||||
import de.danoeh.antennapod.storage.preferences.UserPreferences;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.robolectric.RobolectricTestRunner;
|
||||
import org.robolectric.RuntimeEnvironment;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import de.danoeh.antennapod.model.feed.Feed;
|
||||
import de.danoeh.antennapod.model.feed.FeedItem;
|
||||
import de.danoeh.antennapod.model.feed.FeedMedia;
|
||||
import de.danoeh.antennapod.storage.preferences.PlaybackPreferences;
|
||||
import de.danoeh.antennapod.storage.preferences.UserPreferences;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
@ -35,40 +30,119 @@ import static org.junit.Assert.assertNotSame;
|
||||
import static org.junit.Assert.assertSame;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Test class for {@link FeedDatabaseWriter}.
|
||||
*/
|
||||
@RunWith(RobolectricTestRunner.class)
|
||||
public class DbTasksTest {
|
||||
public class FeedDatabaseWriterTest {
|
||||
private Context context;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
context = InstrumentationRegistry.getInstrumentation().getTargetContext();
|
||||
context = RuntimeEnvironment.getApplication();
|
||||
UserPreferences.init(context);
|
||||
PlaybackPreferences.init(context);
|
||||
SynchronizationQueue.setInstance(new SynchronizationQueueStub());
|
||||
|
||||
// create new database
|
||||
PodDBAdapter.init(context);
|
||||
PodDBAdapter.deleteDatabase();
|
||||
PodDBAdapter adapter = PodDBAdapter.getInstance();
|
||||
adapter.open();
|
||||
adapter.close();
|
||||
SynchronizationQueue.setInstance(new SynchronizationQueueStub());
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
DBWriter.tearDownTests();
|
||||
PodDBAdapter.tearDownTests();
|
||||
@Test
|
||||
public void testStoreNewFeed() {
|
||||
Feed feed = createFeed();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
feed.getItems().add(createItem("item-" + i, "Item " + i, feed));
|
||||
}
|
||||
Feed updatedFeed = FeedDatabaseWriter.updateFeed(context, feed, false);
|
||||
List<FeedItem> storedItems = DBReader.getFeedItemList(updatedFeed, FeedItemFilter.unfiltered(),
|
||||
SortOrder.EPISODE_TITLE_A_Z, 0, Integer.MAX_VALUE);
|
||||
assertEquals(3, storedItems.size());
|
||||
for (int i = 0; i < 3; i++) {
|
||||
assertEquals("item-" + i, storedItems.get(i).getItemIdentifier());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddItemsToExistingFeed() {
|
||||
Feed feed = createFeed();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
feed.getItems().add(createItem("item-" + i, "Item " + i, feed));
|
||||
}
|
||||
feed = FeedDatabaseWriter.updateFeed(context, feed, false);
|
||||
|
||||
Feed updatedFeed = createFeed();
|
||||
updatedFeed.setId(feed.getId());
|
||||
for (int i = 3; i < 6; i++) {
|
||||
updatedFeed.getItems().add(createItem("item-" + i, "Item " + i, feed));
|
||||
}
|
||||
FeedDatabaseWriter.updateFeed(context, updatedFeed, false);
|
||||
|
||||
List<FeedItem> dbItems = DBReader.getFeedItemList(feed, FeedItemFilter.unfiltered(),
|
||||
SortOrder.EPISODE_TITLE_A_Z, 0, Integer.MAX_VALUE);
|
||||
assertEquals(6, dbItems.size());
|
||||
for (int i = 0; i < 6; i++) {
|
||||
assertEquals("item-" + i, dbItems.get(i).getItemIdentifier());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddOrUpdateItems() throws ExecutionException, InterruptedException {
|
||||
Feed feed = createFeed();
|
||||
for (int i = 0; i < 3; i++) {
|
||||
feed.getItems().add(createItem("item-" + i, "Item " + i, feed));
|
||||
}
|
||||
feed = FeedDatabaseWriter.updateFeed(context, feed, false);
|
||||
DBReader.getFeedItemList(feed, FeedItemFilter.unfiltered(),
|
||||
SortOrder.EPISODE_TITLE_A_Z, 0, Integer.MAX_VALUE);
|
||||
DBWriter.markItemPlayed(feed.getItems().get(2), FeedItem.PLAYED, false).get();
|
||||
|
||||
Feed updatedFeed = createFeed();
|
||||
updatedFeed.setId(feed.getId());
|
||||
for (int i = 2; i < 5; i++) {
|
||||
updatedFeed.getItems().add(createItem("item-" + i, "Item " + i, feed));
|
||||
}
|
||||
FeedDatabaseWriter.updateFeed(context, updatedFeed, false);
|
||||
|
||||
List<FeedItem> dbItems = DBReader.getFeedItemList(feed, FeedItemFilter.unfiltered(),
|
||||
SortOrder.EPISODE_TITLE_A_Z, 0, Integer.MAX_VALUE);
|
||||
assertEquals(5, dbItems.size());
|
||||
for (int i = 0; i < 5; i++) {
|
||||
assertEquals("item-" + i, dbItems.get(i).getItemIdentifier());
|
||||
}
|
||||
assertEquals(FeedItem.PLAYED, dbItems.get(2).getPlayState());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuplicateItemsInFeed() {
|
||||
Feed feed = createFeed();
|
||||
feed.getItems().add(createItem("id1", "Duplicate Title", feed));
|
||||
feed.getItems().add(createItem("id2", "Duplicate Title", feed));
|
||||
FeedDatabaseWriter.updateFeed(context, feed, false); // First update just takes the feed without complaining
|
||||
FeedDatabaseWriter.updateFeed(context, feed, false);
|
||||
|
||||
List<DownloadResult> downloadLog = DBReader.getDownloadLog();
|
||||
assertEquals(1, downloadLog.size());
|
||||
assertEquals(DownloadError.ERROR_PARSER_EXCEPTION_DUPLICATE, downloadLog.get(0).getReason());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGuidUpdated() {
|
||||
Feed feed = createFeed();
|
||||
feed.getItems().add(createItem("old-id", "Unique Title", feed));
|
||||
FeedDatabaseWriter.updateFeed(context, feed, false);
|
||||
|
||||
Feed newFeed = createFeed();
|
||||
newFeed.getItems().add(createItem("new-id", "Unique Title", newFeed));
|
||||
Feed stored = FeedDatabaseWriter.updateFeed(context, newFeed, false);
|
||||
|
||||
assertEquals(1, stored.getItems().size());
|
||||
assertEquals("new-id", stored.getItems().get(0).getItemIdentifier());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateFeedNewFeed() {
|
||||
final int numItems = 10;
|
||||
|
||||
Feed feed = new Feed("url", null, "title");
|
||||
feed.setItems(new ArrayList<>());
|
||||
Feed feed = createFeed();
|
||||
for (int i = 0; i < numItems; i++) {
|
||||
feed.getItems().add(new FeedItem(0, "item " + i, "id " + i, "link " + i,
|
||||
new Date(), FeedItem.UNPLAYED, feed));
|
||||
@ -86,12 +160,9 @@ public class DbTasksTest {
|
||||
/** Two feeds with the same title, but different download URLs should be treated as different feeds. */
|
||||
@Test
|
||||
public void testUpdateFeedSameTitle() {
|
||||
|
||||
Feed feed1 = new Feed("url1", null, "title");
|
||||
Feed feed2 = new Feed("url2", null, "title");
|
||||
|
||||
feed1.setItems(new ArrayList<>());
|
||||
feed2.setItems(new ArrayList<>());
|
||||
Feed feed1 = createFeed();
|
||||
Feed feed2 = createFeed();
|
||||
feed2.setDownloadUrl("different url");
|
||||
|
||||
Feed savedFeed1 = FeedDatabaseWriter.updateFeed(context, feed1, false);
|
||||
Feed savedFeed2 = FeedDatabaseWriter.updateFeed(context, feed2, false);
|
||||
@ -104,8 +175,7 @@ public class DbTasksTest {
|
||||
final int numItemsOld = 10;
|
||||
final int numItemsNew = 10;
|
||||
|
||||
final Feed feed = new Feed("url", null, "title");
|
||||
feed.setItems(new ArrayList<>());
|
||||
final Feed feed = createFeed();
|
||||
for (int i = 0; i < numItemsOld; i++) {
|
||||
feed.getItems().add(new FeedItem(0, "item " + i, "id " + i, "link " + i,
|
||||
new Date(i), FeedItem.PLAYED, feed));
|
||||
@ -144,9 +214,9 @@ public class DbTasksTest {
|
||||
|
||||
@Test
|
||||
public void testUpdateFeedMediaUrlResetState() {
|
||||
final Feed feed = new Feed("url", null, "title");
|
||||
final Feed feed = createFeed();
|
||||
FeedItem item = new FeedItem(0, "item", "id", "link", new Date(), FeedItem.PLAYED, feed);
|
||||
feed.setItems(singletonList(item));
|
||||
feed.setItems(Collections.singletonList(item));
|
||||
|
||||
PodDBAdapter adapter = PodDBAdapter.getInstance();
|
||||
adapter.open();
|
||||
@ -173,8 +243,7 @@ public class DbTasksTest {
|
||||
|
||||
@Test
|
||||
public void testUpdateFeedRemoveUnlistedItems() {
|
||||
final Feed feed = new Feed("url", null, "title");
|
||||
feed.setItems(new ArrayList<>());
|
||||
final Feed feed = createFeed();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
feed.getItems().add(
|
||||
new FeedItem(0, "item " + i, "id " + i, "link " + i, new Date(i), FeedItem.PLAYED, feed));
|
||||
@ -195,8 +264,7 @@ public class DbTasksTest {
|
||||
|
||||
@Test
|
||||
public void testUpdateFeedSetDuplicate() {
|
||||
final Feed feed = new Feed("url", null, "title");
|
||||
feed.setItems(new ArrayList<>());
|
||||
final Feed feed = createFeed();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
FeedItem item =
|
||||
new FeedItem(0, "item " + i, "id " + i, "link " + i, new Date(i), FeedItem.PLAYED, feed);
|
||||
@ -249,4 +317,19 @@ public class DbTasksTest {
|
||||
lastDate = item.getPubDate();
|
||||
}
|
||||
}
|
||||
|
||||
private Feed createFeed() {
|
||||
Feed feed = new Feed("url", null, null);
|
||||
feed.setItems(new ArrayList<>());
|
||||
return feed;
|
||||
}
|
||||
|
||||
private FeedItem createItem(String identifier, String title, Feed feed) {
|
||||
FeedItem item = new FeedItem();
|
||||
item.setItemIdentifier(identifier);
|
||||
item.setTitle(title);
|
||||
item.setMedia(new FeedMedia(item, "url-" + title, 2, "mime"));
|
||||
item.setFeed(feed);
|
||||
return item;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,40 @@
|
||||
package de.danoeh.antennapod.storage.database;
|
||||
|
||||
import de.danoeh.antennapod.model.feed.Feed;
|
||||
import de.danoeh.antennapod.model.feed.FeedItem;
|
||||
import de.danoeh.antennapod.model.feed.FeedMedia;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import static org.junit.Assert.assertSame;
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
public class FeedItemDuplicateGuesserPoolTest {
|
||||
|
||||
@Test
|
||||
public void testDuplicateIsConsistent() {
|
||||
Feed feed = new Feed("url", null, null);
|
||||
FeedItem item1 = createItem("id1", "Title", feed);
|
||||
FeedItem item2 = createItem("id2", "Title", feed);
|
||||
|
||||
FeedItemDuplicateGuesserPool pool = new FeedItemDuplicateGuesserPool(new ArrayList<>());
|
||||
pool.add(item1);
|
||||
assertSame(item1, pool.guessDuplicate(item1));
|
||||
assertSame(item1, pool.guessDuplicate(item2));
|
||||
pool.add(item2);
|
||||
assertSame(item1, pool.guessDuplicate(item1));
|
||||
assertSame(item1, pool.guessDuplicate(item2));
|
||||
}
|
||||
|
||||
private FeedItem createItem(String identifier, String title, Feed feed) {
|
||||
FeedItem item = new FeedItem();
|
||||
item.setItemIdentifier(identifier);
|
||||
item.setTitle(title);
|
||||
item.setMedia(new FeedMedia(item, "url-" + title, 2, "mime"));
|
||||
item.setFeed(feed);
|
||||
return item;
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user