From b1af62d34272875c45ea71fc339125244d6c92a4 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Sun, 12 Nov 2023 21:40:45 +0100 Subject: [PATCH] #938 use url optimizers in kika crawler --- .../mserver/crawler/ard/ArdUrlOptimizer.java | 41 +++++++++++++++---- .../crawler/kika/tasks/KikaApiFilmTask.java | 27 ++++++++---- .../crawler/zdf/ZdfVideoUrlOptimizer.java | 2 + .../ArdVideoInfoJsonDeserializerTest.java | 4 +- 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdUrlOptimizer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdUrlOptimizer.java index c9b3bdb84..e573021d7 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdUrlOptimizer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdUrlOptimizer.java @@ -2,11 +2,34 @@ import de.mediathekview.mserver.crawler.basic.AbstractCrawler; -public class ArdUrlOptimizer { +import java.util.HashMap; +import java.util.Map; - public static final String ARD_URL_1280 = ".xl.mp4"; - public static final String ARD_URL_1920 = ".xxl.mp4"; +public class ArdUrlOptimizer { + private static final String BR_URL_1280 = "_X.mp4"; + private static final String BR_URL_1920 = "_HD.mp4"; + private static final String HR_URL_1280 = "1280x720-50p-3200kbit.mp4"; + private static final String HR_URL_1920 = "1920x1080-50p-5000kbit.mp4"; + private static final String NDR_URL_1280 = ".hd.mp4"; + private static final String NDR_URL_1920 = ".1080.mp4"; + private static final String RBB_URL_1280 = "hd1080-avc720.mp4"; + private static final String RBB_URL_1920 = "hd1080-avc1080.mp4"; + private static final String SR_URL_1280 = "_P.mp4"; + private static final String SR_URL_1920 = "_H.mp4"; + private static final String SWR_URL_1280 = ".xl.mp4"; + private static final String SWR_URL_1920 = ".xxl.mp4"; + + private static final Map HD_OPTIMIZE = new HashMap<>(); + + static { + HD_OPTIMIZE.put(BR_URL_1280, new String[] {BR_URL_1920}); + HD_OPTIMIZE.put(HR_URL_1280, new String[] {HR_URL_1920}); + HD_OPTIMIZE.put(NDR_URL_1280, new String[] {NDR_URL_1920}); + HD_OPTIMIZE.put(RBB_URL_1280, new String[] {RBB_URL_1920}); + HD_OPTIMIZE.put(SR_URL_1280, new String[] {SR_URL_1920}); + HD_OPTIMIZE.put(SWR_URL_1280, new String[] {SWR_URL_1920}); + } protected AbstractCrawler crawler; @@ -15,10 +38,14 @@ public ArdUrlOptimizer(AbstractCrawler aCrawler) { } public String optimizeHdUrl(final String url) { - if (url.contains(ARD_URL_1280)) { - final String optimizedUrl = url.replace(ARD_URL_1280, ARD_URL_1920); - if (crawler.requestUrlExists(optimizedUrl)) { - return optimizedUrl; + for (Map.Entry entry : HD_OPTIMIZE.entrySet()) { + if (url.contains(entry.getKey())) { + for (String optimizeFragment : entry.getValue()) { + final String optimizedUrl = url.replace(entry.getKey(), optimizeFragment); + if (crawler.requestUrlExists(optimizedUrl)) { + return optimizedUrl; + } + } } } diff --git a/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java b/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java index 4d68062cc..085ce3841 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java @@ -17,6 +17,8 @@ import java.util.Set; import java.util.UUID; +import de.mediathekview.mserver.crawler.ard.ArdUrlOptimizer; +import de.mediathekview.mserver.crawler.zdf.ZdfVideoUrlOptimizer; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -41,8 +43,13 @@ public class KikaApiFilmTask extends AbstractJsonRestTask urlToCrawlDTOs) { super(crawler, urlToCrawlDTOs, null); + ardUrlOptimizer = new ArdUrlOptimizer(crawler); + zdfVideoUrlOptimizer = new ZdfVideoUrlOptimizer(crawler); } @Override @@ -122,7 +129,7 @@ protected AbstractRecursiveConverterTask createNewOwnInsta } protected Optional getAiredDateTime(KikaApiFilmDto aDTO) { - Optional airedDate = null; + Optional airedDate; if (aDTO.getDate().isPresent()) { airedDate = parseLocalDateTime(aDTO, aDTO.getDate()); } else { @@ -151,12 +158,18 @@ protected Set getSubtitle(KikaApiVideoInfoDto aResponseObj, KikaApiFilmDto protected Map getVideoUrls(KikaApiVideoInfoDto aResponseObj, KikaApiFilmDto aDTO) { Map urls = new EnumMap<>(Resolution.class); for (Map.Entry element : aResponseObj.getVideoUrls().entrySet()) { - try { - final FilmUrl filmUrl = new FilmUrl(element.getValue(), crawler.determineFileSizeInKB(element.getValue())); - urls.put(element.getKey(), filmUrl); - } catch (MalformedURLException e) { - LOG.error("Invalid video url {} for {} error {}", element.getValue(), aDTO.getUrl(), e); - } + try { + String url = element.getValue(); + if (Resolution.HD.equals(element.getKey())) { + url = ardUrlOptimizer.optimizeHdUrl(url); + url = zdfVideoUrlOptimizer.getOptimizedUrlHd(url); + } + + final FilmUrl filmUrl = new FilmUrl(url, crawler.determineFileSizeInKB(url)); + urls.put(element.getKey(), filmUrl); + } catch (MalformedURLException e) { + LOG.error("Invalid video url {} for {} error {}", element.getValue(), aDTO.getUrl(), e); + } } return urls; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfVideoUrlOptimizer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfVideoUrlOptimizer.java index 50a71acd1..bf07661ca 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfVideoUrlOptimizer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfVideoUrlOptimizer.java @@ -35,6 +35,7 @@ public class ZdfVideoUrlOptimizer { private static final String HD_3296_15_13 = "3296k_p15v13.mp4"; private static final String HD_3296_15_14 = "3296k_p15v14.mp4"; private static final String HD_3328_15_15 = "3328k_p15v15.mp4"; + private static final String HD_3328_15_17 = "3328k_p15v17.mp4"; private static final String HD_3328_12 = "3328k_p36v12.mp4"; private static final String HD_3328_13 = "3328k_p36v13.mp4"; private static final String HD_3328_14 = "3328k_p36v14.mp4"; @@ -78,6 +79,7 @@ public class ZdfVideoUrlOptimizer { HD_OPTIMIZE.put(HD_3360_36_17, new String[] {HD_6660_37_17, HD_6628_61_17}); HD_OPTIMIZE.put(HD_6628_61_17, new String[] {HD_6660_37_17}); + HD_OPTIMIZE.put(HD_3328_15_17, new String[] {HD_6660_37_17, HD_6628_61_17, HD_3360_36_17}); HD_OPTIMIZE.put(HD_3328_15_15, new String[] {HD_3360_36_15}); HD_OPTIMIZE.put(HD_3256, new String[] {HD_3328_12}); HD_OPTIMIZE.put(HD_3296_15_14, new String[] {HD_3328_36_14}); diff --git a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializerTest.java index caf8df929..4ef1bd46d 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializerTest.java @@ -72,7 +72,7 @@ public static Collection data() { "", "https://mediastorage01.sr-online.de/Video/UD/DOKU/1505155201_20170911_KANDIDATENCHECK_LUKSIC_M.mp4", "https://srstorage01-a.akamaihd.net/Video/UD/DOKU/1505155201_20170911_KANDIDATENCHECK_LUKSIC_L.mp4", - "https://srstorage01-a.akamaihd.net/Video/UD/DOKU/1505155201_20170911_KANDIDATENCHECK_LUKSIC_P.mp4" + "https://srstorage01-a.akamaihd.net/Video/UD/DOKU/1505155201_20170911_KANDIDATENCHECK_LUKSIC_H.mp4" }, { "/ard/ard_video_use_http_url.json", @@ -144,7 +144,7 @@ public static Collection data() { "", "https://srstorage01-a.akamaihd.net/Video/FS/SA/sportarena_20190815_184401_M.mp4", "https://srstorage01-a.akamaihd.net/Video/FS/SA/sportarena_20190815_184401_L.mp4", - "https://srstorage01-a.akamaihd.net/Video/FS/SA/sportarena_20190815_184401_P.mp4" + "https://srstorage01-a.akamaihd.net/Video/FS/SA/sportarena_20190815_184401_H .mp4" }, { "/ndr/ndr_film_detail_m3u8.json",