Skip to content

Commit

Permalink
refactor: dump.misc
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Sep 19, 2023
1 parent 19f8dd6 commit f3e4abc
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 56 deletions.
9 changes: 6 additions & 3 deletions wikiteam3/dumpgenerator/dump/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,12 @@ def __init__(params=None):
else:
DumpGenerator.createNewDump(config=config, other=other)

save_IndexPHP(config=config, session=other["session"])
save_SpecialVersion(config=config, session=other["session"])
save_siteinfo(config=config, session=other["session"])
if config.index:
save_IndexPHP(config=config, session=other["session"])
save_SpecialVersion(config=config, session=other["session"])
if config.api:
save_siteinfo(config=config, session=other["session"])

mark_as_done(config=config, mark=ALL_DUMPED_MARK)
bye(config.path)
if other["upload"]:
Expand Down
12 changes: 9 additions & 3 deletions wikiteam3/dumpgenerator/dump/misc/index_php.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@
from wikiteam3.utils import remove_IP
from wikiteam3.dumpgenerator.config import Config

def save_IndexPHP(config: Config=None, session=None):
def save_IndexPHP(config: Config, session):
"""Save index.php as .html, to preserve license details available at the botom of the page"""

assert config.index

if os.path.exists("%s/index.html" % (config.path)):
print("index.html exists, do not overwrite")
else:
print("Downloading index.php (Main Page) as index.html")
r = session.post(url=config.index, params=None, timeout=10)
raw = str(r.text)
try:
r = session.post(url=config.index, params=None, timeout=10)
raw = str(r.text)
except Exception as e:
print("Error: %s" % (e))
return
Delay(config=config)
raw = remove_IP(raw=raw)
with open("%s/index.html" % (config.path), "w", encoding="utf-8") as outfile:
Expand Down
96 changes: 49 additions & 47 deletions wikiteam3/dumpgenerator/dump/misc/site_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,53 @@
def save_siteinfo(config: Config, session: requests.Session):
"""Save a file with site info"""

if config.api:
if os.path.exists("%s/siteinfo.json" % (config.path)):
print("siteinfo.json exists, do not overwrite")
else:
print("Downloading site info as siteinfo.json")
assert config.api

# MediaWiki 1.13+
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
"siprop": "general|namespaces|statistics|dbrepllag|interwikimap|namespacealiases|specialpagealiases|usergroups|extensions|skins|magicwords|fileextensions|rightsinfo",
"sinumberingroup": 1,
"format": "json",
},
timeout=10,
)
# MediaWiki 1.11-1.12
if not "query" in get_JSON(r):
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
"siprop": "general|namespaces|statistics|dbrepllag|interwikimap",
"format": "json",
},
timeout=10,
)
# MediaWiki 1.8-1.10
if not "query" in get_JSON(r):
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
"siprop": "general|namespaces",
"format": "json",
},
timeout=10,
)
result = get_JSON(r)
Delay(config=config)
with open(
"%s/siteinfo.json" % (config.path), "w", encoding="utf-8"
) as outfile:
outfile.write(json.dumps(result, indent=4, sort_keys=True, ensure_ascii=False))
if os.path.exists("%s/siteinfo.json" % (config.path)):
print("siteinfo.json exists, do not overwrite")
return

print("Downloading site info as siteinfo.json")

# MediaWiki 1.13+
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
"siprop": "general|namespaces|statistics|dbrepllag|interwikimap|namespacealiases|specialpagealiases|usergroups|extensions|skins|magicwords|fileextensions|rightsinfo",
"sinumberingroup": 1,
"format": "json",
},
timeout=10,
)
# MediaWiki 1.11-1.12
if not "query" in get_JSON(r):
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
"siprop": "general|namespaces|statistics|dbrepllag|interwikimap",
"format": "json",
},
timeout=10,
)
# MediaWiki 1.8-1.10
if not "query" in get_JSON(r):
r = session.get(
url=config.api,
params={
"action": "query",
"meta": "siteinfo",
"siprop": "general|namespaces",
"format": "json",
},
timeout=10,
)
result = get_JSON(r)
Delay(config=config)
with open(
"%s/siteinfo.json" % (config.path), "w", encoding="utf-8"
) as outfile:
outfile.write(json.dumps(result, indent=4, sort_keys=True, ensure_ascii=False))
12 changes: 9 additions & 3 deletions wikiteam3/dumpgenerator/dump/misc/special_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@
def save_SpecialVersion(config: Config, session: requests.Session):
"""Save Special:Version as .html, to preserve extensions details"""

assert config.index

if os.path.exists("%s/SpecialVersion.html" % (config.path)):
print("SpecialVersion.html exists, do not overwrite")
else:
print("Downloading Special:Version with extensions and other related info")
r = session.post(
url=config.index, params={"title": "Special:Version"}, timeout=10
)
try:
r = session.post(
url=config.index, params={"title": "Special:Version"}, timeout=10
)
except Exception as e:
print("Error: %s" % (e))
return
raw = r.text
Delay(config=config)
raw = remove_IP(raw=raw)
Expand Down

0 comments on commit f3e4abc

Please sign in to comment.