added historical versions to old directory

This commit is contained in:
Antoni Sawicki 2019-07-16 23:39:40 -07:00
parent 127114f753
commit a63d4ef50d
3 changed files with 1110 additions and 0 deletions

392
old/picidae.py Normal file
View File

@ -0,0 +1,392 @@
#!/usr/bin/env python
# picidae.py - makes screenshots of webpages
# and analyzes the webpage structure and writes image-maps of the links
# as well as forms that are placed on the exact position of the old form.
# It is a part of the art project www.picidae.net
# http://www.picidae.net
#
# This script is based on webkit2png from Paul Hammond.
# It was extended by picidae.net
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
__version__ = "1.0"
import sys
#print "hello ... "
try:
import Foundation
import WebKit
import AppKit
import objc
import urllib
except ImportError:
print "Cannot find pyobjc library files. Are you sure it is installed?"
sys.exit()
#try:
# from optparse import OptionParser
#except ImportError:
# print "OptionParser not imported"
# sys.exit()
from optparse import OptionParser
class AppDelegate (Foundation.NSObject):
# what happens when the app starts up
def applicationDidFinishLaunching_(self, aNotification):
webview = aNotification.object().windows()[0].contentView()
webview.frameLoadDelegate().getURL(webview)
class WebkitLoad (Foundation.NSObject, WebKit.protocols.WebFrameLoadDelegate):
# what happens if something goes wrong while loading
def webView_didFailLoadWithError_forFrame_(self,webview,error,frame):
print " ... something went wrong 1"
self.getURL(webview)
def webView_didFailProvisionalLoadWithError_forFrame_(self,webview,error,frame):
print " ... something went wrong 2"
self.getURL(webview)
def makeFilename(self,URL,options):
# make the filename
if options.filename:
filename = options.filename
elif options.md5:
try:
import md5
except ImportError:
print "--md5 requires python md5 library"
AppKit.NSApplication.sharedApplication().terminate_(None)
filename = md5.new(URL).hexdigest()
else:
import re
filename = re.sub('\W','',URL);
filename = re.sub('^http','',filename);
if options.datestamp:
import time
now = time.strftime("%Y%m%d")
filename = now + "-" + filename
import os
dir = os.path.abspath(os.path.expanduser(options.dir))
return os.path.join(dir,filename)
def saveImages(self,bitmapdata,filename,options):
# save the fullsize png
if options.fullsize:
bitmapdata.representationUsingType_properties_(AppKit.NSPNGFileType,None).writeToFile_atomically_(filename + ".png",objc.YES)
if options.thumb or options.clipped:
# work out how big the thumbnail is
width = bitmapdata.pixelsWide()
height = bitmapdata.pixelsHigh()
thumbWidth = (width * options.scale)
thumbHeight = (height * options.scale)
# make the thumbnails in a scratch image
scratch = AppKit.NSImage.alloc().initWithSize_(
Foundation.NSMakeSize(thumbWidth,thumbHeight))
scratch.lockFocus()
AppKit.NSGraphicsContext.currentContext().setImageInterpolation_(
AppKit.NSImageInterpolationHigh)
thumbRect = Foundation.NSMakeRect(0.0, 0.0, thumbWidth, thumbHeight)
clipRect = Foundation.NSMakeRect(0.0,
thumbHeight-options.clipheight,
options.clipwidth, options.clipheight)
bitmapdata.drawInRect_(thumbRect)
thumbOutput = AppKit.NSBitmapImageRep.alloc().initWithFocusedViewRect_(thumbRect)
clipOutput = AppKit.NSBitmapImageRep.alloc().initWithFocusedViewRect_(clipRect)
scratch.unlockFocus()
# save the thumbnails as pngs
if options.thumb:
thumbOutput.representationUsingType_properties_(
AppKit.NSPNGFileType,None
).writeToFile_atomically_(filename + "-thumb.png",objc.YES)
if options.clipped:
clipOutput.representationUsingType_properties_(
AppKit.NSPNGFileType,None
).writeToFile_atomically_(filename + "-clipped.png",objc.YES)
def getURL(self,webview):
if self.urls:
if self.urls[0] == '-':
url = sys.stdin.readline().rstrip()
if not url: AppKit.NSApplication.sharedApplication().terminate_(None)
else:
url = self.urls.pop(0)
else:
AppKit.NSApplication.sharedApplication().terminate_(None)
#print "<urlcall href=\"\" />", url, "..."
#print "<urlcall href=\"%s\" />" % (url)
self.resetWebview(webview)
webview.mainFrame().loadRequest_(Foundation.NSURLRequest.requestWithURL_(Foundation.NSURL.URLWithString_(url)))
if not webview.mainFrame().provisionalDataSource():
print "<nosuccess />"
self.getURL(webview)
def resetWebview(self,webview):
rect = Foundation.NSMakeRect(0,0,self.options.initWidth,self.options.initHeight)
webview.window().setContentSize_((self.options.initWidth,self.options.initHeight))
webview.setFrame_(rect)
def resizeWebview(self,view):
view.window().display()
view.window().setContentSize_(view.bounds().size)
view.setFrame_(view.bounds())
def captureView(self,view):
view.lockFocus()
bitmapdata = AppKit.NSBitmapImageRep.alloc()
bitmapdata.initWithFocusedViewRect_(view.bounds())
view.unlockFocus()
return bitmapdata
# what happens when the page has finished loading
def webView_didFinishLoadForFrame_(self,webview,frame):
# don't care about subframes
if (frame == webview.mainFrame()):
view = frame.frameView().documentView()
self.resizeWebview(view)
URL = frame.dataSource().initialRequest().URL().absoluteString()
filename = self.makeFilename(URL, self.options)
bitmapdata = self.captureView(view)
self.saveImages(bitmapdata,filename,self.options)
# ----------------------------------
# picidae my stuff
#print "url"
print "<page>"
print frame.dataSource().request().URL().absoluteString()
print "</page>"
# Analyse HTML and get links
xmloutput = "<map name=\"map\">\r";
domdocument = frame.DOMDocument()
domnodelist = domdocument.getElementsByTagName_('A')
i = 0
while i < domnodelist.length():
# linkvalue
value = domnodelist.item_(i).valueForKey_('href')
# position-rect
myrect = domnodelist.item_(i).boundingBox()
xmin = Foundation.NSMinX(myrect)
ymin = Foundation.NSMinY(myrect)
xmax = Foundation.NSMaxX(myrect)
ymax = Foundation.NSMaxY(myrect)
# print Link
prefix = ""
xmloutput += "<area shape=\"rect\" coords=\"%i,%i,%i,%i\" alt=\"\"><![CDATA[%s%s]]></area>\r" % (xmin, ymin, xmax, ymax, prefix, value)
i += 1
#print "</map>"
xmloutput += "</map>"
f = open(filename +'.xml', 'w+')
f.write(xmloutput)
f.close()
# ----------------------------------
# get forms
xmloutput = "<forms>\r";
xmloutput += "<page><![CDATA["
xmloutput += frame.dataSource().request().URL().absoluteString()
xmloutput += "]]></page>\r"
domdocument = frame.DOMDocument()
domnodelist = domdocument.getElementsByTagName_('form')
i = 0
while i < domnodelist.length():
# form
action = domnodelist.item_(i).valueForKey_('action')
method = domnodelist.item_(i).valueForKey_('method')
xmloutput += "<form method=\"%s\" ><action><![CDATA[%s]]></action>\r" % (method, action)
# form fields
fieldlist = domnodelist.item_(i).getElementsByTagName_('input')
j=0
while j < fieldlist.length():
# values
type = fieldlist.item_(j).valueForKey_('type')
name = fieldlist.item_(j).valueForKey_('name')
formvalue = fieldlist.item_(j).valueForKey_('value')
size = fieldlist.item_(j).valueForKey_('size')
checked = fieldlist.item_(j).valueForKey_('checked')
# write output
xmloutput += "\t<input "
if (type):
xmloutput += "type=\"%s\" " % (type)
if (name):
xmloutput += "name=\"%s\" " % (name)
if (size):
xmloutput += "size=\"%s\" " % (size)
if (type and type != "hidden"):
myrect = fieldlist.item_(j).boundingBox()
xmin = Foundation.NSMinX(myrect)
ymin = Foundation.NSMinY(myrect)
xmax = Foundation.NSMaxX(myrect)
ymax = Foundation.NSMaxY(myrect)
height = ymax - ymin
width = xmax - xmin
if (type == "radio" or type == "checkbox"):
xmin -= 3
ymin -= 3
xmloutput += "style=\"position:absolute;top:%i;left:%i;width:%i;height:%i;\" " % (ymin, xmin, width, height)
if (checked):
xmloutput += "checked=\"%s\" " % (checked)
xmloutput += "><![CDATA["
if (formvalue and type!="text" and type!="password"):
#xmloutput += urllib.quote(formvalue)
dummy=10
xmloutput += "]]></input>\r"
j += 1
xmloutput += "</form>\r"
i += 1
xmloutput += "</forms>"
f = open(filename +'.form.xml', 'w+')
f.write(xmloutput)
f.close()
# End picidae
# ----------------------------------
#print " ... done"
self.getURL(webview)
#trying to give back the real url
def main():
# parse the command line
usage = """%prog [options] [http://example.net/ ...]
examples:
%prog http://google.com/ # screengrab google
%prog -W 1000 -H 1000 http://google.com/ # bigger screengrab of google
%prog -T http://google.com/ # just the thumbnail screengrab
%prog -TF http://google.com/ # just thumbnail and fullsize grab
%prog -o foo http://google.com/ # save images as "foo-thumb.png" etc
%prog - # screengrab urls from stdin"""
cmdparser = OptionParser(usage, version=("webkit2png "+__version__))
# TODO: add quiet/verbose options
cmdparser.add_option("-W", "--width",type="float",default=800.0,
help="initial (and minimum) width of browser (default: 800)")
cmdparser.add_option("-H", "--height",type="float",default=600.0,
help="initial (and minimum) height of browser (default: 600)")
cmdparser.add_option("--clipwidth",type="float",default=200.0,
help="width of clipped thumbnail (default: 200)",
metavar="WIDTH")
cmdparser.add_option("--clipheight",type="float",default=150.0,
help="height of clipped thumbnail (default: 150)",
metavar="HEIGHT")
cmdparser.add_option("-s", "--scale",type="float",default=0.25,
help="scale factor for thumbnails (default: 0.25)")
cmdparser.add_option("-m", "--md5", action="store_true",
help="use md5 hash for filename (like del.icio.us)")
cmdparser.add_option("-o", "--filename", type="string",default="",
metavar="NAME", help="save images as NAME.png,NAME-thumb.png etc")
cmdparser.add_option("-F", "--fullsize", action="store_true",
help="only create fullsize screenshot")
cmdparser.add_option("-T", "--thumb", action="store_true",
help="only create thumbnail sreenshot")
cmdparser.add_option("-C", "--clipped", action="store_true",
help="only create clipped thumbnail screenshot")
cmdparser.add_option("-d", "--datestamp", action="store_true",
help="include date in filename")
cmdparser.add_option("-D", "--dir",type="string",default="./",
help="directory to place images into")
(options, args) = cmdparser.parse_args()
if len(args) == 0:
cmdparser.print_help()
return
if options.filename:
if len(args) != 1 or args[0] == "-":
print "--filename option requires exactly one url"
return
if options.scale == 0:
cmdparser.error("scale cannot be zero")
# make sure we're outputing something
if not (options.fullsize or options.thumb or options.clipped):
options.fullsize = True
options.thumb = True
options.clipped = True
# work out the initial size of the browser window
# (this might need to be larger so clipped image is right size)
options.initWidth = (options.clipwidth / options.scale)
options.initHeight = (options.clipheight / options.scale)
if options.width>options.initWidth:
options.initWidth = options.width
if options.height>options.initHeight:
options.initHeight = options.height
app = AppKit.NSApplication.sharedApplication()
# create an app delegate
delegate = AppDelegate.alloc().init()
AppKit.NSApp().setDelegate_(delegate)
# create a window
rect = Foundation.NSMakeRect(-16000,-16000,100,100)
win = AppKit.NSWindow.alloc()
win.initWithContentRect_styleMask_backing_defer_ (rect,
AppKit.NSBorderlessWindowMask, 2, 0)
# create a webview object
webview = WebKit.WebView.alloc()
webview.initWithFrame_(rect)
# turn off scrolling so the content is actually x wide and not x-15
webview.mainFrame().frameView().setAllowsScrolling_(objc.NO)
# add the webview to the window
win.setContentView_(webview)
# create a LoadDelegate
loaddelegate = WebkitLoad.alloc().init()
loaddelegate.options = options
loaddelegate.urls = args
webview.setFrameLoadDelegate_(loaddelegate)
app.run()
if __name__ == '__main__' : main()

506
old/webkit2png.py Normal file
View File

@ -0,0 +1,506 @@
#!/usr/bin/python
# webkit2png - makes screenshots of web pages
# http://www.paulhammond.org/webkit2png
__version__ = "dev"
# Copyright (c) 2004-2013 Paul Hammond
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
import sys
import optparse
import re
import os
try:
import Foundation
import WebKit
import AppKit
import Quartz
import objc
except ImportError:
print "Cannot find pyobjc library files. Are you sure it is installed?"
sys.exit()
class AppDelegate(Foundation.NSObject):
# what happens when the app starts up
def applicationDidFinishLaunching_(self, aNotification):
webview = aNotification.object().windows()[0].contentView()
webview.frameLoadDelegate().getURL(webview)
self.performSelector_withObject_afterDelay_("timeout:", None,
self.timeout)
def timeout_(self, obj):
Foundation.NSLog("timed out!")
AppKit.NSApplication.sharedApplication().terminate_(None)
class Webkit2PngScriptBridge(Foundation.NSObject):
def init(self):
self = super(Webkit2PngScriptBridge, self).init()
self.is_stopped = False
self.start_callback = False
return self
def stop(self):
self.is_stopped = True
def start(self):
self.is_stopped = False
self.start_callback()
def isSelectorExcludedFromWebScript_(self, sel):
if sel in ['stop', 'start']:
return False
else:
return True
class WebkitLoad (Foundation.NSObject, WebKit.protocols.WebFrameLoadDelegate):
# what happens if something goes wrong while loading
def webView_didFailLoadWithError_forFrame_(self, webview, error, frame):
if error.code() == Foundation.NSURLErrorCancelled:
return
print " ... something went wrong: "+error.localizedDescription()
self.getURL(webview)
def webView_didFailProvisionalLoadWithError_forFrame_(self, webview, error,
frame):
if error.code() == Foundation.NSURLErrorCancelled:
return
print " ... something went wrong: "+error.localizedDescription()
self.getURL(webview)
def makeFilename(self, URL, options):
# make the filename
if options.filename:
filename = options.filename
elif options.md5:
try:
import md5
except ImportError:
print "--md5 requires python md5 library"
AppKit.NSApplication.sharedApplication().terminate_(None)
filename = md5.new(URL).hexdigest()
else:
filename = re.sub('^https?', '', URL)
filename = re.sub('\W', '', filename)
if options.datestamp:
import time
now = time.strftime("%Y%m%d")
filename = now + "-" + filename
dir = os.path.abspath(os.path.expanduser(options.dir))
if not os.path.exists(options.dir):
os.makedirs(dir)
return os.path.join(dir, filename)
def saveImages(self, bitmapdata, filename, options):
# save the fullsize png
if options.fullsize:
bitmapdata.representationUsingType_properties_(
AppKit.NSPNGFileType,
None
).writeToFile_atomically_(filename + "-full.png", objc.YES)
if options.thumb or options.clipped:
# work out how big the thumbnail is
width = bitmapdata.pixelsWide()
height = bitmapdata.pixelsHigh()
thumbWidth = (width * options.scale)
thumbHeight = (height * options.scale)
# make the thumbnails in a scratch image
scratch = AppKit.NSImage.alloc().initWithSize_(
Foundation.NSMakeSize(thumbWidth, thumbHeight))
scratch.lockFocus()
AppKit.NSGraphicsContext.currentContext().setImageInterpolation_(
AppKit.NSImageInterpolationHigh)
thumbRect = Foundation.NSMakeRect(0.0, 0.0, thumbWidth,
thumbHeight)
clipRect = Foundation.NSMakeRect(
0.0,
thumbHeight-options.clipheight,
options.clipwidth,
options.clipheight)
bitmapdata.drawInRect_(thumbRect)
thumbOutput = AppKit.NSBitmapImageRep.alloc()\
.initWithFocusedViewRect_(thumbRect)
clipOutput = AppKit.NSBitmapImageRep.alloc()\
.initWithFocusedViewRect_(clipRect)
scratch.unlockFocus()
# save the thumbnails as pngs
if options.thumb:
thumbOutput.representationUsingType_properties_(
AppKit.NSPNGFileType, None).writeToFile_atomically_(
filename + "-thumb.png", objc.YES)
if options.clipped:
clipOutput.representationUsingType_properties_(
AppKit.NSPNGFileType, None).writeToFile_atomically_(
filename + "-clipped.png", objc.YES)
def getURL(self, webview):
if self.urls:
if self.urls[0] == '-':
url = sys.stdin.readline().rstrip()
if not url:
AppKit.NSApplication.sharedApplication().terminate_(None)
else:
url = self.urls.pop(0)
else:
AppKit.NSApplication.sharedApplication().terminate_(None)
nsurl = Foundation.NSURL.URLWithString_(url)
if not (nsurl and nsurl.scheme()):
nsurl = Foundation.NSURL.alloc().initFileURLWithPath_(url)
nsurl = nsurl.absoluteURL()
if self.options.ignore_ssl_check:
Foundation.NSURLRequest.setAllowsAnyHTTPSCertificate_forHost_(objc.YES, nsurl.host())
print "Fetching", nsurl, "..."
self.resetWebview(webview)
scriptobject = webview.windowScriptObject()
scriptobject.setValue_forKey_(Webkit2PngScriptBridge.alloc().init(),
'webkit2png')
webview.mainFrame().loadRequest_(Foundation.NSURLRequest.requestWithURL_(nsurl))
if not webview.mainFrame().provisionalDataSource():
print " ... not a proper url?"
self.getURL(webview)
def resetWebview(self, webview):
rect = Foundation.NSMakeRect(0, 0, self.options.initWidth,
self.options.initHeight)
window = webview.window()
window.setContentSize_((self.options.initWidth,
self.options.initHeight))
if self.options.transparent:
window.setOpaque_(objc.NO)
window.setBackgroundColor_(AppKit.NSColor.clearColor())
webview.setDrawsBackground_(objc.NO)
webview.setFrame_(rect)
def captureView(self, view):
bounds = view.bounds()
if bounds.size.height > self.options.UNSAFE_max_height:
print >> sys.stderr, "Error: page height greater than %s, " \
"clipping to avoid crashing windowserver." % \
self.options.UNSAFE_max_height
bounds.size.height = self.options.UNSAFE_max_height
if bounds.size.width > self.options.UNSAFE_max_width:
print >> sys.stderr, "Error: page width greater than %s, " \
"clipping to avoid crashing windowserver." % \
self.options.UNSAFE_max_width
bounds.size.width = self.options.UNSAFE_max_width
view.window().display()
view.window().setContentSize_(
Foundation.NSSize(self.options.initWidth, self.options.initHeight))
view.setFrame_(bounds)
if hasattr(view, "bitmapImageRepForCachingDisplayInRect_"):
bitmapdata = view.bitmapImageRepForCachingDisplayInRect_(bounds)
view.cacheDisplayInRect_toBitmapImageRep_(bounds, bitmapdata)
else:
view.lockFocus()
bitmapdata = AppKit.NSBitmapImageRep.alloc()
bitmapdata.initWithFocusedViewRect_(bounds)
view.unlockFocus()
return bitmapdata
# what happens when the page has finished loading
def webView_didFinishLoadForFrame_(self, webview, frame):
# don't care about subframes
if (frame == webview.mainFrame()):
scriptobject = webview.windowScriptObject()
if self.options.js:
scriptobject.evaluateWebScript_(self.options.js)
bridge = scriptobject.valueForKey_('webkit2png')
def doGrab():
Foundation.NSTimer.\
scheduledTimerWithTimeInterval_target_selector_userInfo_repeats_(
self.options.delay, self, self.doGrab, webview, False)
if bridge.is_stopped:
bridge.start_callback = doGrab
else:
doGrab()
def doGrab(self, timer):
webview = timer.userInfo()
frame = webview.mainFrame()
view = frame.frameView().documentView()
URL = webview.mainFrame().dataSource().initialRequest().URL()\
.absoluteString()
filename = self.makeFilename(URL, self.options)
bitmapdata = self.captureView(view)
if self.options.selector:
doc = frame.DOMDocument()
el = doc.querySelector_(self.options.selector)
if not el:
print " ... no element matching %s found?" % \
self.options.selector
self.getURL(webview)
return
left, top = 0, 0
parent = el
while parent:
left += parent.offsetLeft()
top += parent.offsetTop()
parent = parent.offsetParent()
zoom = self.options.zoom
cropRect = view.window().convertRectToBacking_(Foundation.NSMakeRect(
zoom * left, zoom * top,
zoom * el.offsetWidth(), zoom * el.offsetHeight()))
cropped = Quartz.CGImageCreateWithImageInRect(
bitmapdata.CGImage(), cropRect)
bitmapdata = AppKit.NSBitmapImageRep.alloc().initWithCGImage_(
cropped)
Quartz.CGImageRelease(cropped)
self.saveImages(bitmapdata, filename, self.options)
print " ... done"
self.getURL(webview)
def main():
# parse the command line
usage = """%prog [options] [http://example.net/ ...]
Examples:
%prog http://google.com/ # screengrab google
%prog -W 1000 -H 1000 http://google.com/ # bigger screengrab of google
%prog -T http://google.com/ # just the thumbnail screengrab
%prog -TF http://google.com/ # just thumbnail and fullsize grab
%prog -o foo http://google.com/ # save images as "foo-thumb.png" etc
%prog - # screengrab urls from stdin
%prog /path/to/file.html # screengrab local html file
%prog -h | less # full documentation"""
cmdparser = optparse.OptionParser(usage,
version=("webkit2png " + __version__))
# TODO: add quiet/verbose options
cmdparser.add_option("--debug", action="store_true",
help=optparse.SUPPRESS_HELP)
# warning: setting these too high can crash your window server
cmdparser.add_option("--UNSAFE-max-height", type="int", default=30000,
help=optparse.SUPPRESS_HELP)
cmdparser.add_option("--UNSAFE-max-width", type="int", default=30000,
help=optparse.SUPPRESS_HELP)
group = optparse.OptionGroup(cmdparser, "Network Options")
group.add_option("--timeout", type="float", default=60.0,
help="page load timeout (default: 60)")
group.add_option("--user-agent", type="string", default=False,
help="set user agent header")
group.add_option("--ignore-ssl-check", action="store_true", default=False,
help="ignore SSL Certificate name mismatches")
cmdparser.add_option_group(group)
group = optparse.OptionGroup(cmdparser, "Browser Window Options")
group.add_option(
"-W", "--width", type="float", default=800.0,
help="initial (and minimum) width of browser (default: 800)")
group.add_option(
"-H", "--height", type="float", default=600.0,
help="initial (and minimum) height of browser (default: 600)")
group.add_option(
"-z", "--zoom", type="float", default=1.0,
help='zoom level of browser, equivalent to "Zoom In" and "Zoom Out" '
'in "View" menu (default: 1.0)')
group.add_option(
"--selector", type="string",
help="CSS selector for a single element to capture (first matching "
"element will be used)")
cmdparser.add_option_group(group)
group = optparse.OptionGroup(cmdparser, "Output size options")
group.add_option(
"-F", "--fullsize", action="store_true",
help="only create fullsize screenshot")
group.add_option(
"-T", "--thumb", action="store_true",
help="only create thumbnail sreenshot")
group.add_option(
"-C", "--clipped", action="store_true",
help="only create clipped thumbnail screenshot")
group.add_option(
"--clipwidth", type="float", default=200.0,
help="width of clipped thumbnail (default: 200)",
metavar="WIDTH")
group.add_option(
"--clipheight", type="float", default=150.0,
help="height of clipped thumbnail (default: 150)",
metavar="HEIGHT")
group.add_option(
"-s", "--scale", type="float", default=0.25,
help="scale factor for thumbnails (default: 0.25)")
cmdparser.add_option_group(group)
group = optparse.OptionGroup(cmdparser, "Output filename options")
group.add_option(
"-D", "--dir", type="string", default="./",
help="directory to place images into")
group.add_option(
"-o", "--filename", type="string", default="",
metavar="NAME", help="save images as NAME-full.png,NAME-thumb.png etc")
group.add_option(
"-m", "--md5", action="store_true",
help="use md5 hash for filename (like del.icio.us)")
group.add_option(
"-d", "--datestamp", action="store_true",
help="include date in filename")
cmdparser.add_option_group(group)
group = optparse.OptionGroup(cmdparser, "Web page functionality")
group.add_option(
"--delay", type="float", default=0,
help="delay between page load finishing and screenshot")
group.add_option(
"--js", type="string", default=None,
help="JavaScript to execute when the window finishes loading"
"(example: --js='document.bgColor=\"red\";'). "
"If you need to wait for asynchronous code to finish before "
"capturing the screenshot, call webkit2png.stop() before the "
"async code runs, then webkit2png.start() to capture the image.")
group.add_option(
"--noimages", action="store_true",
help=optparse.SUPPRESS_HELP)
group.add_option(
"--no-images", action="store_true",
help="don't load images")
group.add_option(
"--nojs", action="store_true",
help=optparse.SUPPRESS_HELP)
group.add_option(
"--no-js", action="store_true",
help="disable JavaScript support")
group.add_option(
"--transparent", action="store_true",
help="render output on a transparent background (requires a web "
"page with a transparent background)", default=False)
cmdparser.add_option_group(group)
(options, args) = cmdparser.parse_args()
if len(args) == 0:
cmdparser.print_usage()
return
if options.filename:
if len(args) != 1 or args[0] == "-":
print "--filename option requires exactly one url"
return
# deprecated options
if options.nojs:
print >> sys.stderr, 'Warning: --nojs will be removed in ' \
'webkit2png 1.0. Please use --no-js.'
options.no_js = True
if options.noimages:
print >> sys.stderr, 'Warning: --noimages will be removed in ' \
'webkit2png 1.0. Please use --no-images.'
options.no_images = True
if options.scale == 0:
cmdparser.error("scale cannot be zero")
# make sure we're outputing something
if not (options.fullsize or options.thumb or options.clipped):
options.fullsize = True
options.thumb = True
options.clipped = True
# work out the initial size of the browser window
# (this might need to be larger so clipped image is right size)
options.initWidth = (options.clipwidth / options.scale)
options.initHeight = (options.clipheight / options.scale)
options.width *= options.zoom
if options.width > options.initWidth:
options.initWidth = options.width
if options.height > options.initHeight:
options.initHeight = options.height
# Hide the dock icon (needs to run before NSApplication.sharedApplication)
AppKit.NSBundle.mainBundle().infoDictionary()['LSBackgroundOnly'] = '1'
app = AppKit.NSApplication.sharedApplication()
# create an app delegate
delegate = AppDelegate.alloc().init()
delegate.timeout = options.timeout
AppKit.NSApp().setDelegate_(delegate)
# create a window
rect = Foundation.NSMakeRect(0, 0, 100, 100)
win = AppKit.NSWindow.alloc()
win.initWithContentRect_styleMask_backing_defer_(
rect, AppKit.NSBorderlessWindowMask, 2, 0)
if options.debug:
win.orderFrontRegardless()
# create a webview object
webview = WebKit.WebView.alloc()
webview.initWithFrame_(rect)
# turn off scrolling so the content is actually x wide and not x-15
webview.mainFrame().frameView().setAllowsScrolling_(objc.NO)
if options.user_agent:
webview.setCustomUserAgent_(options.user_agent)
else:
webkit_version = Foundation.NSBundle.bundleForClass_(WebKit.WebView)\
.objectForInfoDictionaryKey_(WebKit.kCFBundleVersionKey)[1:]
webview.setApplicationNameForUserAgent_(
"Like-Version/6.0 Safari/%s webkit2png/%s" % (webkit_version, __version__))
webview.setPreferencesIdentifier_('webkit2png')
webview.preferences().setLoadsImagesAutomatically_(not options.no_images)
webview.preferences().setJavaScriptEnabled_(not options.no_js)
if options.zoom != 1.0:
webview._setZoomMultiplier_isTextOnly_(options.zoom, False)
# add the webview to the window
win.setContentView_(webview)
# create a LoadDelegate
loaddelegate = WebkitLoad.alloc().init()
loaddelegate.options = options
loaddelegate.urls = args
webview.setFrameLoadDelegate_(loaddelegate)
app.run()
if __name__ == '__main__':
main()

212
old/webrender.py Normal file
View File

@ -0,0 +1,212 @@
#!/usr/bin/env python
# webrender.py - recursively render web pages to a gif+imagemap of clickable links
# caveat: this script requires to be run as a regular user and cannot run as a daemon
# from apache cgi-bin, you can use python built in http server instead
# usage:
# create cgi-bin directory, copy webrender.py to cgi-bin and chmod 755
# python -m CGIHTTPServer 8000
# navigate web browser to http://x.x.x.x:8000/cgi-bin/webrender.py
# the webrender-xxx.gif images are created in the CWD of the http server
__version__ = "1.0"
#
# This program is based on the software picidae.py 1.0 from http://www.picidae.net
# It was modified by Antoni Sawicki
#
# This program is based on the software webkit2png 0.4 from Paul Hammond.
# It was extended by picidae.net
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
try:
import sys
import os
import glob
import random
import Foundation
import WebKit
import AppKit
import objc
import string
import urllib
import socket
import cgi
import cgitb; cgitb.enable() # for trubleshooting
except ImportError:
print "Cannot find pyobjc library files. Are you sure it is installed?"
sys.exit()
from optparse import OptionParser
class AppDelegate (Foundation.NSObject):
# what happens when the app starts up
def applicationDidFinishLaunching_(self, aNotification):
webview = aNotification.object().windows()[0].contentView()
webview.frameLoadDelegate().getURL(webview)
class WebkitLoad (Foundation.NSObject, WebKit.protocols.WebFrameLoadDelegate):
# what happens if something goes wrong while loading
def webView_didFailLoadWithError_forFrame_(self,webview,error,frame):
print " ... something went wrong 1: " + error.localizedDescription()
self.getURL(webview)
def webView_didFailProvisionalLoadWithError_forFrame_(self,webview,error,frame):
print " ... something went wrong 2: " + error.localizedDescription()
self.getURL(webview)
def getURL(self,webview):
if self.urls:
if self.urls[0] == '-':
url = sys.stdin.readline().rstrip()
if not url: AppKit.NSApplication.sharedApplication().terminate_(None)
else:
url = self.urls.pop(0)
else:
AppKit.NSApplication.sharedApplication().terminate_(None)
self.resetWebview(webview)
webview.mainFrame().loadRequest_(Foundation.NSURLRequest.requestWithURL_(Foundation.NSURL.URLWithString_(url)))
if not webview.mainFrame().provisionalDataSource():
print "<nosuccess />"
self.getURL(webview)
def resetWebview(self,webview):
rect = Foundation.NSMakeRect(0,0,1024,768)
webview.window().setContentSize_((1024,768))
webview.setFrame_(rect)
def resizeWebview(self,view):
view.window().display()
view.window().setContentSize_(view.bounds().size)
view.setFrame_(view.bounds())
def captureView(self,view):
view.lockFocus()
bitmapdata = AppKit.NSBitmapImageRep.alloc()
bitmapdata.initWithFocusedViewRect_(view.bounds())
view.unlockFocus()
return bitmapdata
# what happens when the page has finished loading
def webView_didFinishLoadForFrame_(self,webview,frame):
# don't care about subframes
if (frame == webview.mainFrame()):
view = frame.frameView().documentView()
self.resizeWebview(view)
URL = frame.dataSource().initialRequest().URL().absoluteString()
for fl in glob.glob("webrender-*.gif"):
os.remove(fl)
GIF = "webrender-%s.gif" % (random.randrange(0,1000))
bitmapdata = self.captureView(view)
bitmapdata.representationUsingType_properties_(AppKit.NSGIFFileType,None).writeToFile_atomically_(GIF,objc.YES)
myurl = "http://%s:%s%s" % (socket.gethostbyname(socket.gethostname()), os.getenv("SERVER_PORT"), os.getenv("SCRIPT_NAME"))
print "Content-type: text/html\r\n\r\n"
print "<!-- webrender.py by Antoni Sawicki -->"
print "<html><head><title>Webrender - %s</title></head><body><table border=\"0\"><tr>" % (URL)
print "<td><form action=\"%s\">" % (myurl)
print "<input type=\"text\" name=\"url\" value=\"%s\" size=\"80\">" % (URL)
print "<input type=\"submit\" value=\"go\">"
print "</form></td><td>"
print "<form action=\"%s\">" % (myurl)
print "<input type=\"text\" name=\"search\" value=\"\" size=\"20\">"
print "<input type=\"submit\" value=\"search\">"
print "</form></td></tr></table>"
print "<img src=\"../%s\" alt=\"webrender\" usemap=\"#map\" border=\"0\">" % (GIF)
# Analyse HTML and get links
print "<map name=\"map\">";
domdocument = frame.DOMDocument()
domnodelist = domdocument.getElementsByTagName_('A')
i = 0
while i < domnodelist.length():
# linkvalue
value = domnodelist.item_(i).valueForKey_('href')
# position-rect
myrect = domnodelist.item_(i).boundingBox()
xmin = Foundation.NSMinX(myrect)
ymin = Foundation.NSMinY(myrect)
xmax = Foundation.NSMaxX(myrect)
ymax = Foundation.NSMaxY(myrect)
# print Link
escval = string.replace( string.replace(value, "?", "TNXQUE"), "&", "TNXAMP" )
print "<area shape=\"rect\" coords=\"%i,%i,%i,%i\" alt=\"\" href=\"%s?url=%s\"></area>" % (xmin, ymin, xmax, ymax, myurl, escval)
i += 1
print "</map>"
print "</body></html>"
self.getURL(webview)
def main():
# obtain url from cgi input
form = cgi.FieldStorage()
rawurl = form.getfirst("url", "http://www.google.com")
rawsearch = form.getfirst("search")
if rawsearch:
url = "http://www.google.com/search?q=%s" % (rawsearch)
else:
url = string.replace( string.replace(rawurl, "TNXAMP", "&"), "TNXQUE", "?")
AppKit.NSApplicationLoad();
app = AppKit.NSApplication.sharedApplication()
# create an app delegate
delegate = AppDelegate.alloc().init()
AppKit.NSApp().setDelegate_(delegate)
# create a window
rect = Foundation.NSMakeRect(-16000,-16000,100,100)
win = AppKit.NSWindow.alloc()
win.initWithContentRect_styleMask_backing_defer_ (rect, AppKit.NSBorderlessWindowMask, 2, 0)
# create a webview object
webview = WebKit.WebView.alloc()
webview.initWithFrame_(rect)
# turn off scrolling so the content is actually x wide and not x-15
webview.mainFrame().frameView().setAllowsScrolling_(objc.NO)
# add the webview to the window
win.setContentView_(webview)
# create a LoadDelegate
loaddelegate = WebkitLoad.alloc().init()
loaddelegate.options = [""]
loaddelegate.urls = [url]
webview.setFrameLoadDelegate_(loaddelegate)
app.run()
if __name__ == '__main__' : main()