macproxy/macify.py

21 lines
610 B
Python
Raw Normal View History

2013-12-12 16:11:34 +00:00
from bs4 import BeautifulSoup
2013-12-12 16:13:58 +00:00
2013-12-12 16:11:34 +00:00
def macify(html):
soup = BeautifulSoup(html)
2013-12-12 16:13:58 +00:00
for tag in soup(['script', 'link', 'style', 'noscript']):
2013-12-12 16:11:34 +00:00
tag.extract()
for tag in soup(['div', 'span']):
tag.replaceWithChildren()
for tag in soup():
for attr in ['style', 'onclick']:
del tag[attr]
return str(soup)
if __name__ == '__main__':
import requests
html = requests.get('http://stackoverflow.com/questions/5598524/can-i-remove-script-tags-with-beautifulsoup').content
html = macify(html)
with open('macified.html', 'w') as fd:
fd.write(html)