From 43c008917b1c6519554ca5842d505509b30c359f Mon Sep 17 00:00:00 2001 From: "Tyler G. Hicks-Wright" Date: Thu, 12 Dec 2013 09:11:34 -0700 Subject: [PATCH] First pass at the proxy. --- macify.py | 20 ++++++++++++++++++++ proxy.py | 22 ++++++++++++++++++++++ requirements.txt | 2 ++ 3 files changed, 44 insertions(+) create mode 100644 macify.py create mode 100644 proxy.py create mode 100644 requirements.txt diff --git a/macify.py b/macify.py new file mode 100644 index 0000000..1f2909a --- /dev/null +++ b/macify.py @@ -0,0 +1,20 @@ +from bs4 import BeautifulSoup + +def macify(html): + soup = BeautifulSoup(html) + for tag in soup(['script', 'link', 'style', 'img', 'noscript']): + tag.extract() + for tag in soup(['div', 'span']): + tag.replaceWithChildren() + for tag in soup(): + for attr in ['style', 'onclick']: + del tag[attr] + return str(soup) + +if __name__ == '__main__': + import requests + html = requests.get('http://stackoverflow.com/questions/5598524/can-i-remove-script-tags-with-beautifulsoup').content + html = macify(html) + with open('macified.html', 'w') as fd: + fd.write(html) + diff --git a/proxy.py b/proxy.py new file mode 100644 index 0000000..39b2df3 --- /dev/null +++ b/proxy.py @@ -0,0 +1,22 @@ +from macify import macify +import requests +from flask import request, Flask + +app = Flask(__name__) +session = requests.Session() + +@app.route('/', defaults={'path': ''}, methods=['GET']) +@app.route('/', methods=['GET']) +def get(path): + resp = session.get(path, params=request.args) + return macify(resp.content), resp.status_code + +@app.route('/', defaults={'path': ''}, methods=['POST']) +@app.route('/', methods=['POST']) +def post(path): + resp = session.post(path, data=request.form, allow_redirects=True) + return macify(resp.content), resp.status_code + +if __name__ == '__main__': + app.debug = True + app.run() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a50db1c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +flask