forked from scaleway/docs-content
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreplace_concepts.py
221 lines (172 loc) · 7.97 KB
/
replace_concepts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import os
import re
class AddLinkOnFirstConcept:
def __init__(self):
pass
def to_kebab_case(self, value):
return "-".join(value.lower().split())
def select_folder_to_process(self):
product=input("enter the product to process (in the \"category/product\" format: ")
return product
# This function retrieves the concepts of a product and , and store them as a list of list ([concept1, url1], [concept2, url2])
def create_concepts_list(self, product):
# gets current working directory
directory_path=os.getcwd()
concepts_list = []
for dirpath, dirnames, filenames in os.walk(f"{directory_path}/{product}"):
for filename in filenames:
if filename.endswith("concepts.mdx"):
file_path=f"{dirpath}/{filename}"
# print(file_path)
with open(file_path, 'r') as file:
for line in file:
concept_specs = []
if line.startswith("## "):
# Removes "## " at the beginning of concept line, and appends it the concept to concept_specs
concept_specs.append(line.strip("# \n").lower())
# Creates the relative path and appends it to concept_specs
concept_anchor = file_path.removeprefix(directory_path).removesuffix(".mdx") + "/#" + self.to_kebab_case(line.strip("# \n"))
concept_specs.append(concept_anchor)
# Appends the list of concept specs (concept + concept_anchor) to concepts_list
concepts_list.append(concept_specs)
return concepts_list
def create_files_to_update_list(self, product):
files_list = []
for dirpath, dirnames, filenames in os.walk(product):
for filename in filenames:
if filename.endswith(".mdx") and filename != "index.mdx" and filename != "concepts.mdx":
file_path = f"{dirpath}/{filename}"
files_list.append(file_path)
return files_list
def check_if_concept_in_other_concept(concepts_list):
concepts = concepts_list
control_list = concepts_list
skip_concept = False
for i in concepts:
for j in control_list:
if i in j:
skip_concept = True
print(f"Concept \"{i}\" is contained in \n{j}\n")
return skip_concept
# Check if occurrence is in code block, frontmatter, or monospace
def check_if_skip_line(self, current_line, skip_line_toggle, old_string):
# Check if in frontmatter OK
skip_line = skip_line_toggle
if current_line == '---\n' and skip_line_toggle == False:
skip_line = True
skip_line_toggle = True
return skip_line, skip_line_toggle
if current_line == '---\n' and skip_line_toggle == True:
skip_line = True
skip_line_toggle = False
return skip_line, skip_line_toggle
# Check if in code block OK
# First if statement toggles on when entering a code block
if current_line == '```\n' and skip_line_toggle == False:
skip_line = True
skip_line_toggle = True
return skip_line, skip_line_toggle
# Second if statement toggles off when exiting a code block
if current_line == '```\n' and skip_line_toggle == True:
skip_line = True
skip_line_toggle = False
return skip_line, skip_line_toggle
# Check if line is a title OK
if current_line.startswith(("# ", "## ", "### ", "#### ")) == True and skip_line_toggle == False:
skip_line = True
return skip_line, skip_line_toggle
# Check if term is in bold NOK
if bool(re.search('\*\*(.*)\*\*', old_string)) == True:
skip_line = True
return skip_line, skip_line_toggle
else:
return skip_line, skip_line_toggle
# TODO Check if in inline code
# WORKING - Version with line-by-line processing
def line_by_line_replace(self, current_file, old_string, new_string):
with open(current_file, "r") as file_to_process:
lines_of_file = file_to_process.readlines()
skip_line_toggle = False
# iterates over each line of the file
for i in range(len(lines_of_file)):
# Here, skip_line is used to determine wether the line should be skipped.
# skip_line_toggle is used to keep the "True" or "False" state by injecting it in the check_if_skip_line() function
skip_line_result = self.check_if_skip_line(lines_of_file[i], skip_line_toggle, old_string)
skip_line = skip_line_result[0]
skip_line_toggle = skip_line_result[1]
if old_string in lines_of_file[i] and skip_line == False:
# replace concept once in the line
lines_of_file[i] = lines_of_file[i].replace(old_string, new_string, 1)
break
with open(current_file, "w") as file_to_write:
file_to_write.writelines(lines_of_file)
return
def replace(self):
# product = self.select_folder_to_process()
product = "serverless/jobs"
concepts_list = self.create_concepts_list(product)
files_list = self.create_files_to_update_list(product)
# Looks for each concept in each page
for file in files_list:
for concept in concepts_list:
old_string = concept[0]
new_string = f"[{concept[0]}]({concept[1]})"
current_file=file
# Check if concept already has link to concepts page
with open(file) as file_to_check:
if new_string not in file_to_check.read():
# Replace first occurrence of concept
self.line_by_line_replace(current_file, old_string, new_string)
# Add test new content and error handling before printing line below
print(f"{old_string} replaced by {new_string} in file {file}.")
else:
print(f"{new_string} already in {file_to_check}")
file_to_check.close()
return
# TODO
# - Add tests
# - Address case where concept is part of a slug/URL "(/(*.)concept(*.)/)"
# - Address case where concept is part of another concept (e.g. "serverless" and "Serverless Framework")
# - Address case when concept is in plural form (e.g. [job](link)s ). concept must have space or punctuation after (or no letter).
# - Monospace
# - if concept is between "`[any string]`" and "`[any string]`" then DO NOT skip line
# as
# - if concept is between "`[any string]" and "[any string]`" then skip line
# To check programmatically if a markdown string is monospace or bold or italics, ChatGPT provides the answer below:
# Example in Python with markdown-it-py:
# You can use the markdown-it-py library to parse the Markdown into tokens and inspect them to detect different formatting.
# Install the library:
# bash
# pip install markdown-it-py
# Code Example:
# python
# from markdown_it import MarkdownIt
# # Function to check markdown formatting
# def check_formatting(markdown_text):
# md = MarkdownIt()
# tokens = md.parse(markdown_text)
# for token in tokens:
# if token.type == 'inline':
# for child in token.children:
# if child.type == 'code_inline':
# print("Monospace text:", child.content)
# elif child.type == 'strong_open':
# print("Bold text detected")
# elif child.type == 'em_open':
# print("Italics text detected")
# # Example usage
# markdown_string = "Here is some **bold** text, *italics*, and `monospace` code."
# check_formatting(markdown_string)
# Explanation:
# The MarkdownIt parser breaks down the Markdown string into tokens.
# We loop through these tokens and check for specific types (code_inline for inline monospace, strong_open for bold, and em_open for italics).
# Output:
# scss
# Bold text detected
# Italics text detected
# Monospace text: monospace
# Parsing Bold, Italics, and Monospace
# Monospace (inline code) is detected by the code_inline token.
# Bold text is detected by strong_open and strong_close tokens.
# Italics text is detected by em_open and em_close tokens.
# This approach allows you to programmatically check for these specific formatting patterns in any Markdown string.