Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
AxeemHaider committed Nov 17, 2024
1 parent 6efa389 commit 8ab5fbd
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 24 deletions.
54 changes: 30 additions & 24 deletions extractor/fetch_others.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,33 @@
import json
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from urllib.parse import urljoin, urlparse

def extract_services(service_name):
def extract_services():
# Paths
json_input_path = f"./services/{service_name}.json"
service_name = "others"
service_dir = "./services"
output_dir = f"./others"
images_dir = f"../static/images/{service_name}"

# Ensure directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(images_dir, exist_ok=True)

# Load category data
with open(json_input_path, "r") as file:
services = json.load(file)
# Load services data
services = []
for file_name in os.listdir(service_dir):
if file_name.endswith(".json"):
with open(os.path.join(service_dir, file_name), "r") as f:
data = json.load(f)
for entry in data:
services.append(entry)

category_id = "others"

service_data = {}

external_link = f"https://elest.io/fully-managed-services/{service_name}"
external_link = "https://elest.io/fully-managed-services"

# Fetch the external page with explicit encoding
response = requests.get(external_link)
Expand All @@ -36,6 +41,9 @@ def extract_services(service_name):

# Attempt to locate #cards container
cards_container = soup.select_one("#cards") or soup.find("div", id="cards")

print(len(cards_container), "cards found")

if not cards_container:
print(f"No #cards container found for {external_link}")
return
Expand All @@ -48,6 +56,8 @@ def extract_services(service_name):

if id_exist:
continue
else:
print("New services found", service_id)

link = f"/{service_name}/{category_id}/{service_id}"
ext_link = urljoin("https://elest.io/", href)
Expand All @@ -67,14 +77,14 @@ def extract_services(service_name):
logo_filename = f"{logo_path}/logo{logo_ext}"

# Download the logo image
logo_response = requests.get(logo_url, stream=True)
if logo_response.status_code == 200:
with open(logo_filename, "wb") as img_file:
img_file.write(logo_response.content)
logo_path_relative = logo_filename.replace("../static", "")
else:
print(f"Failed to download logo from {logo_url}")
continue
# logo_response = requests.get(logo_url, stream=True)
# if logo_response.status_code == 200:
# with open(logo_filename, "wb") as img_file:
# img_file.write(logo_response.content)
# logo_path_relative = logo_filename.replace("../static", "")
# else:
# print(f"Failed to download logo from {logo_url}")
# continue

# Create service item if it does not exist
if service_id not in service_data:
Expand All @@ -84,7 +94,7 @@ def extract_services(service_name):
"external_link": ext_link,
"title": title,
"description": description,
"logo": logo_path_relative,
# "logo": logo_path_relative,
"category": {
"id": category_id,
"name": "Others",
Expand All @@ -104,12 +114,8 @@ def extract_services(service_name):
json.dump(list(service_data.values()), outfile, indent=4)
print(f"Data saved to {output_path}")

# names = ["databases", "applications", "development", "hosting-and-infrastructure"]
names = ["applications"]

for name in names:
print("===============================================")
print(f" Extracting {name} ")
print("===============================================")
print("===============================================")
print(f" Extracting ")
print("===============================================")

extract_services(name)
extract_services()
1 change: 1 addition & 0 deletions extractor/others/others.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]

0 comments on commit 8ab5fbd

Please sign in to comment.