From 9770ffa46b2292f4c97a47ec73b3fbba83415db2 Mon Sep 17 00:00:00 2001 From: frank-f Date: Wed, 3 Apr 2024 12:38:20 +0200 Subject: [PATCH] LCSC: Follow first 'pdfUrl' link to get real datasheet URL (#582) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Follow first 'pdfUrl' link to get real datasheet URL * Fix @param * Fix @param * Remove User-Agent header It's not needed - LCSC was just having some server troubles over the weekend * Added comment explaining the json_decode in getRealDatasheetUrl --------- Co-authored-by: Jan Böhmer --- .../Providers/LCSCProvider.php | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php index ee88f27d..beb174e7 100755 --- a/src/Services/InfoProviderSystem/Providers/LCSCProvider.php +++ b/src/Services/InfoProviderSystem/Providers/LCSCProvider.php @@ -90,6 +90,28 @@ private function queryDetail(string $id): PartDetailDTO return $this->getPartDetail($product); } + /** + * @param string $url + * @return String + */ + private function getRealDatasheetUrl(?string $url): string + { + if (!empty($url) && preg_match("/^https:\/\/(datasheet\.lcsc\.com|www\.lcsc\.com\/datasheet)\/.*(C\d+)\.pdf$/", $url, $matches) > 0) { + $response = $this->lcscClient->request('GET', $url, [ + 'headers' => [ + 'Referer' => 'https://www.lcsc.com/product-detail/_' . $matches[2] . '.html' + ], + ]); + if (preg_match('/(pdfUrl): ?("[^"]+wmsc\.lcsc\.com[^"]+\.pdf")/', $response->getContent(), $matches) > 0) { + //HACKY: The URL string contains escaped characters like \u002F, etc. To decode it, the JSON decoding is reused + //See https://github.com/Part-DB/Part-DB-server/pull/582#issuecomment-2033125934 + $jsonObj = json_decode('{"' . $matches[1] . '": ' . $matches[2] . '}'); + $url = $jsonObj->pdfUrl; + } + } + return $url; + } + /** * @param string $term * @return PartDetailDTO[] @@ -273,7 +295,9 @@ private function getProductDatasheets(?string $url): array return []; } - return [new FileDTO($url, null)]; + $realUrl = $this->getRealDatasheetUrl($url); + + return [new FileDTO($realUrl, null)]; } /**