1+ import json
2+ import os
3+ import sys
4+ import re
5+ import logging
6+ from os import path
7+ from urllib.request import urlopen, Request
8+
9+ logger = logging.getLogger(__name__)
10+ logger.setLevel(logging.DEBUG)
11+ handler = logging.FileHandler(filename='hacktricks-preprocessor.log', mode='w', encoding='utf-8')
12+ handler.setLevel(logging.DEBUG)
13+ logger.addHandler(handler)
14+
15+ handler2 = logging.FileHandler(filename='hacktricks-preprocessor-error.log', mode='w', encoding='utf-8')
16+ handler2.setLevel(logging.ERROR)
17+ logger.addHandler(handler2)
18+
19+
20+ def findtitle(search ,obj, key, path=(),):
21+ # logger.debug(f"Looking for {search} in {path}")
22+ if isinstance(obj, dict) and key in obj and obj[key] == search:
23+ return obj, path
24+ if isinstance(obj, list):
25+ for k, v in enumerate(obj):
26+ item = findtitle(search, v, key, (*path, k))
27+ if item is not None:
28+ return item
29+ if isinstance(obj, dict):
30+ for k, v in obj.items():
31+ item = findtitle(search, v, key, (*path, k))
32+ if item is not None:
33+ return item
34+
35+
36+ def ref(matchobj):
37+ logger.debug(f'Ref match: {matchobj.groups(0)[0].strip()}')
38+ href = matchobj.groups(0)[0].strip()
39+ title = href
40+ if href.startswith("http://") or href.startswith("https://"):
41+ if context['config']['preprocessor']['hacktricks']['env'] == 'dev':
42+ pass
43+ else:
44+ try:
45+ raw_html = str(urlopen(Request(href, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0'})).read())
46+ match = re.search('<title>(.*?)</title>', raw_html)
47+ title = match.group(1) if match else href
48+ except Exception as e:
49+ logger.error(f'Error opening URL {href}: {e}')
50+ pass #Dont stop on broken link
51+ else:
52+ try:
53+ if href.endswith("/"):
54+ href = href+"README.md" # Fix if ref points to a folder
55+ if "#" in href:
56+ chapter, _path = findtitle(href.split("#")[0], book, "source_path")
57+ title = " ".join(href.split("#")[1].split("-")).title()
58+ logger.debug(f'Ref has # using title: {title}')
59+ else:
60+ chapter, _path = findtitle(href, book, "source_path")
61+ logger.debug(f'Recursive title search result: {chapter["name"]}')
62+ title = chapter['name']
63+ except Exception as e:
64+ try:
65+ dir = path.dirname(current_chapter['source_path'])
66+ logger.debug(f'Error getting chapter title: {href} trying with relative path {path.normpath(path.join(dir,href))}')
67+ if "#" in href:
68+ chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path")
69+ title = " ".join(href.split("#")[1].split("-")).title()
70+ logger.debug(f'Ref has # using title: {title}')
71+ else:
72+ chapter, _path = findtitle(path.normpath(path.join(dir,href.split('#')[0])), book, "source_path")
73+ title = chapter["name"]
74+ logger.debug(f'Recursive title search result: {chapter["name"]}')
75+ except Exception as e:
76+ logger.error(f"Error: {e}")
77+ logger.error(f'Error getting chapter title: {path.normpath(path.join(dir,href))}')
78+ sys.exit(1)
79+
80+ if href.endswith("/README.md"):
81+ href = href.replace("/README.md", "/index.html")
82+
83+ template = f"""<a class="content_ref" href="{href}"><span class="content_ref_label">{title}</span></a>"""
84+
85+ # translate_table = str.maketrans({"\"":"\\\"","\n":"\\n"})
86+ # translated_text = template.translate(translate_table)
87+ result = template
88+
89+ return result
90+
91+
92+ def files(matchobj):
93+ logger.debug(f'Files match: {matchobj.groups(0)[0].strip()}')
94+ href = matchobj.groups(0)[0].strip()
95+ title = ""
96+
97+ try:
98+ for root, dirs, files in os.walk(os.getcwd()+'/src/files'):
99+ logger.debug(root)
100+ logger.debug(files)
101+ if href in files:
102+ title = href
103+ logger.debug(f'File search result: {os.path.join(root, href)}')
104+
105+ except Exception as e:
106+ logger.error(f"Error: {e}")
107+ logger.error(f'Error searching file: {href}')
108+ sys.exit(1)
109+
110+ if title=="":
111+ logger.error(f'Error searching file: {href}')
112+ sys.exit(1)
113+
114+ template = f"""<a class="content_ref" href="/files/{href}"><span class="content_ref_label">{title}</span></a>"""
115+
116+ result = template
117+
118+ return result
119+
120+
121+ def add_read_time(content):
122+ regex = r'(<\/style>\n# .*(?=\n))'
123+ new_content = re.sub(regex, lambda x: x.group(0) + "\n\nReading time: {{ #reading_time }}", content)
124+ return new_content
125+
126+
127+ def iterate_chapters(sections):
128+ if isinstance(sections, dict) and "PartTitle" in sections: # Not a chapter section
129+ return
130+ elif isinstance(sections, dict) and "Chapter" in sections: # Is a chapter return it and look into sub items
131+ # logger.debug(f"Chapter {sections['Chapter']}")
132+ yield sections['Chapter']
133+ yield from iterate_chapters(sections['Chapter']["sub_items"])
134+ elif isinstance(sections, list): # Iterate through list when in sections and in sub_items
135+ for k, v in enumerate(sections):
136+ yield from iterate_chapters(v)
137+
138+
139+ if __name__ == '__main__':
140+ global context, book, current_chapter
141+ if len(sys.argv) > 1: # we check if we received any argument
142+ if sys.argv[1] == "supports":
143+ # then we are good to return an exit status code of 0, since the other argument will just be the renderer's name
144+ sys.exit(0)
145+ logger.debug('Started hacktricks preprocessor')
146+ # load both the context and the book representations from stdin
147+ context, book = json.load(sys.stdin)
148+
149+ logger.debug(f"Context: {context}")
150+
151+ for chapter in iterate_chapters(book['sections']):
152+ logger.debug(f"Chapter: {chapter['path']}")
153+ current_chapter = chapter
154+ # regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endref[\s]*}}'
155+ regex = r'{{[\s]*#ref[\s]*}}(?:\n)?([^\\\n#]*(?:#(.*))?)(?:\n)?{{[\s]*#endref[\s]*}}'
156+ new_content = re.sub(regex, ref, chapter['content'])
157+ regex = r'{{[\s]*#file[\s]*}}(?:\n)?([^\\\n]*)(?:\n)?{{[\s]*#endfile[\s]*}}'
158+ new_content = re.sub(regex, files, new_content)
159+ new_content = add_read_time(new_content)
160+ chapter['content'] = new_content
161+
162+ content = json.dumps(book)
163+ logger.debug(content)
164+
165+
166+ print(content)
0 commit comments