Now that we have complete implementations for each scraper, we will test their relative performance with this snippet. The imports in the code expect your directory structure to be similar to the book's repository, so please adjust as necessary:
import time
import re
from chp2.all_scrapers import re_scraper, bs_scraper,
lxml_scraper, lxml_xpath_scraper
from chp1.advanced_link_crawler import download
NUM_ITERATIONS = 1000 # number of times to test each scraper
html = download('http://example.webscraping.com/places/view/United-Kingdom-239')
scrapers = [
('Regular expressions', re_scraper),
('BeautifulSoup', bs_scraper),
('Lxml', lxml_scraper),
('Xpath', lxml_xpath_scraper)]
for name, scraper in scrapers:
# record start time of scrape
start = time.time()
for i in range(NUM_ITERATIONS):
if scraper == re_scraper:
re...