Skip to content

Commit

Permalink
Adding tarfile member sanitization to extractall()
Browse files Browse the repository at this point in the history
  • Loading branch information
TrellixVulnTeam committed Oct 15, 2022
1 parent 3cbcd69 commit 8c9d9f5
Showing 1 changed file with 23 additions and 1 deletion.
24 changes: 23 additions & 1 deletion scripts/convert_imdb_reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,29 @@ def main():

if not PATH_DATASETS_IMDB_EXTRACTED.exists():
with tarfile.open(PATH_DATASETS_IMDB) as mytar:
mytar.extractall(PATH_DATASETS_IMDB_EXTRACTED)

import os

def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(mytar, PATH_DATASETS_IMDB_EXTRACTED)

positive = [(p, "positive") for p in (PATH_DATASETS_IMDB_TRAIN / "pos").iterdir()]
negative = [(p, "negative") for p in (PATH_DATASETS_IMDB_TRAIN / "neg").iterdir()]
Expand Down

0 comments on commit 8c9d9f5

Please sign in to comment.