This is a sample code for scraping items from the web, there are some websites that do not allow direct request from script, so if this is the case then need to add headers.
from bs4 import BeautifulSoup as bs import requests from pprint import pprint # an empty list to collect the cities from the web collect_cities = [] # this website has mod security which blocks request, hence headers added to # make the request looks like a web browser's query. resp = requests.get("https://pythonhow.com/example.html", headers={'User-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}) # create a soup object that parses html. soup = bs(resp.content, "html.parser") # find all <div> that has attribute class: cities. cities = soup.find_all("div", {"class": "cities"}) # iterate through cities found from the web and collect them into the list. for city in cities: collect_cities.append(city.find("h2").text) print(collect_cities)
To collect the cities and the descriptions in a list of dictionary use the below.
from bs4 import BeautifulSoup as bs import requests from pprint import pprint # an empty list to collect the cities from the web cities = list() # this website has mod security which blocks request, hence headers added to # make the request looks like a web browser's query. resp = requests.get("https://pythonhow.com/example.html", headers={'User-agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}) # create a soup object that parses html. soup = bs(resp.content, "html.parser") # find all <div> that has attribute class: cities. all_cities = soup.find_all("div", {"class": "cities"}) # collect the city's name and description and collect them as a list of dictionaries. for city in all_cities: cities.append({"city": city.find("h2").text, "description": city.find("p").text}) # print out the results. pprint(cities)
Result