The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Absolutizing hrefs in HTML
from urllib.parse import urljoin
from xml.etree import ElementTree as ET
def absolute_hrefs(html, baseurl):
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
for a in t.findall(".//*[@href]"):
linkclass = a.attrib.get("class", "")
href = urljoin(baseurl, a.attrib.get("href"))
a.attrib['href'] = href
html = ET.tostring(t, method="html", encoding="unicode")
return html