from dryscrape.driver.webkit import Driver as DefaultDriver
from itertools import chain
try:
import urlparse
except ImportError:
import urllib
urlparse = urllib.parse
[docs]class Session(object):
""" A web scraping session based on a driver instance. Implements the proxy
pattern to pass unresolved method calls to the underlying driver.
If no `driver` is specified, the instance will create an instance of
``dryscrape.session.DefaultDriver`` to get a driver instance (defaults to
``dryscrape.driver.webkit.Driver``).
If `base_url` is present, relative URLs are completed with this URL base.
If not, the `get_base_url` method is called on itself to get the base URL. """
def __init__(self,
driver = None,
base_url = None):
self.driver = driver or DefaultDriver()
self.base_url = base_url
# implement proxy pattern
def __getattr__(self, attr):
""" Pass unresolved method calls to underlying driver. """
return getattr(self.driver, attr)
def __dir__(self):
"""Allow for `dir` to detect proxied methods from `Driver`."""
dir_chain = chain(dir(type(self)), dir(self.driver))
return list(set(dir_chain))
[docs] def visit(self, url):
""" Passes through the URL to the driver after completing it using the
instance's URL base. """
return self.driver.visit(self.complete_url(url))
[docs] def complete_url(self, url):
""" Completes a given URL with this instance's URL base. """
if self.base_url:
return urlparse.urljoin(self.base_url, url)
else:
return url
[docs] def interact(self, **local):
""" Drops the user into an interactive Python session with the ``sess`` variable
set to the current session instance. If keyword arguments are supplied, these
names will also be available within the session. """
import code
code.interact(local=dict(sess=self, **local))