doc_infra/parsers/nginx.py

77 lines
2.4 KiB
Python
Raw Permalink Normal View History

2025-01-28 22:14:34 +01:00
#!/bin/env python3
import sys
import bs4
import requests
from urllib.parse import urlparse, urlunparse
from os import listdir
from os.path import isfile, join
def find_icon(domain):
resp = requests.get("http://{}/".format(domain))
page = bs4.BeautifulSoup(resp.text, 'html.parser')
res = "http://{}/favicon.ico".format(domain)
icons = [e for e in page.find_all(name='link') if 'icon' in e.attrs.get('rel')]
if icons:
res = icons[0].attrs.get('href')
url = urlparse(res, scheme='http')
if not url.netloc:
res = urlunparse((url.scheme, domain, url.path, '', '', ''))
return res
def download(domain, icon_url):
i = icon_url.find('.', len(icon_url)-4)
if i>=0:
ext = icon_url[i+1:]
else:
ext = 'ico'
fname = "{}.{}".format(domain, ext)
resp = requests.get(icon_url)
with open(fname, 'wb') as out:
out.write(resp.content)
return icon_url
def main():
mypath = "/var/home/mika/projects/dev/nginx-config/conf.d/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
url = port = ip = ""
result = {}
for file in onlyfiles:
if file.endswith("conf"):
with open(mypath + file, 'r', encoding="utf-8") as f:
content = f.read()
for line in content.splitlines():
if line.strip().startswith("#"):
continue
elif "name" in line:
url = line.split()[1].replace(";", "")
elif "proxy_pass" in line:
ip,port = line.split()[-1].replace(";", "").rsplit(":",1)
if len(port.split("/")) > 1:
port, end_url = port.split("/")
ip = ip + "/" + end_url
ip = ip.replace("http://", "").replace("https://", "")
if all([url, ip, port]):
result[url] = {
"dest":ip,
"port":port,
"icon": download(url, find_icon(url))
}
#print(dict(sorted(result.items(), key=lambda item: item[1])))
for key in result.keys():
print('''"{url}": {{icon: {icon} }}'''.format(
url = key,
icon = result[key]["icon"],
)
)
print('''"{url}" -> "{dest}": "{port}"'''.format(
url = key,
dest = result[key]["dest"],
port = result[key]["port"]
)
)
if __name__ == "__main__":
main()