-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_folder.py
83 lines (73 loc) · 3.19 KB
/
extract_folder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import sys
import pathspec
def read_gitignore(input_dir):
"""
Reads the .gitignore file in the input directory and returns a pathspec object.
"""
gitignore_path = os.path.join(input_dir, ".gitignore")
if os.path.isfile(gitignore_path):
with open(gitignore_path, "r") as gitignore_file:
return pathspec.PathSpec.from_lines("gitwildmatch", gitignore_file)
else:
return pathspec.PathSpec.from_lines("gitwildmatch", [])
def structure_directory_content(input_dir, output_file=None, extensions=None):
"""
This function goes through the input directory recursively and structures
all the file contents into one output file based on the given extensions.
:param input_dir: The input directory to search for files.
:param output_file: The output file where the content will be structured.
If None, 'data.txt' or 'data.<extension>' will be used.
:param extensions: A list of file extensions to include. If None, all files are included.
"""
gitignore_spec = read_gitignore(input_dir)
if extensions:
extensions = [ext.strip() for ext in extensions.split(",") if ext.strip() != ""]
if not output_file and len(extensions) == 1:
output_file = f"data.{extensions[0]}"
else:
extensions = None
if not output_file:
output_file = "data.txt"
with open(output_file, "w") as outfile:
for root, dirs, files in os.walk(input_dir):
files = [
f
for f in files
if not gitignore_spec.match_file(os.path.join(root, str(f)))
]
dirs[:] = [
d
for d in dirs
if not gitignore_spec.match_file(os.path.join(root, str(d)))
]
for file in files:
if extensions is None or any(
file.endswith(f".{ext}") for ext in extensions
):
file_path = os.path.join(root, file)
try:
with open(file_path, "r") as infile:
data = infile.read()
outfile.write(
f"# {os.path.relpath(file_path, input_dir)}\n"
)
outfile.write(data)
outfile.write("\n\n")
except UnicodeDecodeError:
continue
if __name__ == "__main__":
if len(sys.argv) == 1:
input_directory = input("directory path: ")
output_filename = input("output file name (optional): ")
file_extensions = input("file extensions separated by commas (optional): ")
structure_directory_content(
input_directory,
output_filename if output_filename else None,
file_extensions if file_extensions else None,
)
else:
input_directory = sys.argv[1] if len(sys.argv) > 1 else "."
output_filename = sys.argv[2] if len(sys.argv) > 2 else None
file_extensions = sys.argv[3] if len(sys.argv) > 3 else None
structure_directory_content(input_directory, output_filename, file_extensions)