diff --git a/Week-2/Extracting Data With Regular Expressions.py b/Week-2/Extracting Data With Regular Expressions.py index a243dec..82755a6 100644 --- a/Week-2/Extracting Data With Regular Expressions.py +++ b/Week-2/Extracting Data With Regular Expressions.py @@ -1,26 +1,20 @@ ''' -In this assignment you will read through and parse a file with text and numbers. -You will extractall the numbers in the file and compute the sum of the numbers. +In this assignment you will read through and parse a file +with text and numbers. You will extract all the numbers +in the file and compute the sum of the numbers. ''' import re -fname = raw_input('Enter File name :') +try: + fileHandle = open('./regex_sum_706599.txt') +except: + print("Error when opening file") -handle = open(fname) +text = fileHandle.read() +listNumbers = re.findall('[0-9]+', text) -sum=0 +sum = 0 +for strNumber in listNumbers: + sum = sum + int(strNumber) -count = 0 - -for line in handle: - - f = re.findall('[0-9]+',line) - - for num in f: - - if num >= [0]: - - count = count + 1 - sum = sum + int(num) - -print 'There are',count,'values with a sum =',sum +print(sum) diff --git a/Week-4/Following Links in HTML Using BeautifulSoup.py b/Week-4/Following Links in HTML Using BeautifulSoup.py index d784bc3..9b62d3a 100644 --- a/Week-4/Following Links in HTML Using BeautifulSoup.py +++ b/Week-4/Following Links in HTML Using BeautifulSoup.py @@ -5,24 +5,34 @@ We provide two files for this assignment. One is a sample file where we give you the name for your testing and the other is the actual data you need to process for the assignment -Sample problem: Start at http://python-data.dr-chuck.net/known_by_Fikret.html +Sample problem: Start at http://python-data.dr-chuck.net/known_by_Fikret.html Find the link at position 3 (the first name is 1). Follow that link. Repeat this process 4 times. The answer is the last name that you retrieve. -Sequence of names: Fikret Montgomery Mhairade Butchi Anayah +Sequence of names: Fikret Montgomery Mhairade Butchi Anayah Last name in sequence: Anayah -Actual problem: Start at: http://python-data.dr-chuck.net/known_by_Blanka.html +Actual problem: Start at: http://python-data.dr-chuck.net/known_by_Blanka.html Find the link at position 18 (the first name is 1). Follow that link. Repeat this process 7 times. The answer is the last name that you retrieve. Hint: The first character of the name of the last page that you will load is: L ''' -import urllib +import urllib.request +import urllib.parse +import urllib.error from bs4 import BeautifulSoup -url = raw_input('Enter Url: ') -count = int(raw_input("Enter count: ")) -position = int(raw_input("Enter position:")) -for i in range(count): - html = urllib.urlopen(url).read() - soup = BeautifulSoup(html) +import ssl + +# Ignore SSL certificate errors +ctx = ssl.create_default_context() +ctx.check_hostname = False +ctx.verify_mode = ssl.CERT_NONE +url = input('Enter URL: ') +count = int(input('Enter count: ')) +position = int(input('Enter position: ')) + +for i in range(count): + html = urllib.request.urlopen(url, context=ctx).read() + soup = BeautifulSoup(html, 'html.parser') + # Retrieve all of the anchor tags tags = soup('a') s = [] t = [] @@ -31,8 +41,7 @@ s.append(x) y = tag.text t.append(y) - - print s[position-1] - print t[position-1] - url = s[position-1] + print(s[position - 1]) + print(t[position - 1]) + url = s[position - 1] diff --git a/Week-4/Scraping HTML Data with BeautifulSoup.py b/Week-4/Scraping HTML Data with BeautifulSoup.py index f9670a3..126d61d 100644 --- a/Week-4/Scraping HTML Data with BeautifulSoup.py +++ b/Week-4/Scraping HTML Data with BeautifulSoup.py @@ -1,34 +1,31 @@ ''' -Scraping Numbers from HTML using BeautifulSoup -In this assignment you will write a Python program -similar to http://www.pythonlearn.com/code/urllink2.py. -The program will use urllib to read the HTML from the data files below, -and parse the data, extracting numbers and compute the -sum of the numbers in the file. +In this assignment you will write a Python program to use urllib +to read the HTML from the data files below, +and parse the data, extracting numbers +then compute the sum of the numbers in the file -We provide two files for this assignment. -One is a sample file where we give you the sum for your testing and -the other is the actual data you need to process for the assignment. - -Sample data: http://python-data.dr-chuck.net/comments_42.html (Sum=2553) -Actual data: http://python-data.dr-chuck.net/comments_353539.html (Sum ends with 63) -You do not need to save these files to your folder since your program -will read the data directly from the URL. Note: Each student will have a -distinct data url for the assignment - so only use your own data url for analysis. +if you haven't install BeautifulSoup4 before +please run: pip3 install bs4 ''' -import urllib +import urllib.request +import urllib.parse +import urllib.error from bs4 import BeautifulSoup -url = raw_input('Enter - ') +url = input("Enter - ") +html = urllib.request.urlopen(url).read() +soup = BeautifulSoup(html, 'html.parser') + +# Retrieve all of the anchor tags +tags = soup("span") + +count = 0 +sum = 0 + +for tag in tags: + temp = int(tag.text) + count += 1 + sum += temp -html = urllib.urlopen(url).read() -soup = BeautifulSoup(html) -tag = soup("span") -count=0 -sum=0 -for i in tag: - x=int(i.text) - count+=1 - sum = sum + x -print count -print sum +print("Count", count) +print("Sum", sum) diff --git a/Week-5/Extracting Data from XML.py b/Week-5/Extracting Data from XML.py index 0d1fffa..043d213 100644 --- a/Week-5/Extracting Data from XML.py +++ b/Week-5/Extracting Data from XML.py @@ -7,21 +7,26 @@ Actual data: http://python-data.dr-chuck.net/comments_353536.xml (Sum ends with 90) You do not need to save these files to your folder since your program will read the data directly from the URL. Note: Each student will have a distinct data url for the assignment - so only use your own data url for analysis. ''' -import urllib +import urllib.request +import urllib.parse +import urllib.error import xml.etree.ElementTree as ET +import ssl -url = raw_input("Enter - ") -uh = urllib.urlopen(url) -data = uh.read() +url = input("Enter location: ") +urlHandle = urllib.request.urlopen(url) +data = urlHandle.read() tree = ET.fromstring(data) -results = tree.findall('comments/comment') -count =0 -sum=0 -for item in results: - x = int(item.find('count').text) - count =count+1 - sum = sum+x +pData = tree.findall('comments/comment') -print "Count : ",count -print "Sum : ",sum +count = 0 +sum = 0 + +for item in pData: + temp = int(item.find('count').text) # get the data in count element + count = count + 1 + sum = sum + temp + +print("Count", count) +print("Sum", sum) diff --git a/Week-6/Extracting Data from JSON.py b/Week-6/Extracting Data from JSON.py index 340861d..5edd5d5 100644 --- a/Week-6/Extracting Data from JSON.py +++ b/Week-6/Extracting Data from JSON.py @@ -1,27 +1,31 @@ ''' -In this assignment you will write a Python program somewhat similar to http://www.pythonlearn.com/code/json2.py. The program will prompt for a URL, read the JSON data from that URL using urllib and then parse and extract the comment counts from the JSON data, compute the sum of the numbers in the file and enter the sum below: -We provide two files for this assignment. One is a sample file where we give you the sum for your testing and the other is the actual data you need to process for the assignment. - -Sample data: http://python-data.dr-chuck.net/comments_42.json (Sum=2553) -Actual data: http://python-data.dr-chuck.net/comments_353540.json (Sum ends with 71) -You do not need to save these files to your folder since your program will read the data directly from the URL. Note: Each student will have a distinct data url for the assignment - so only use your own data url for analysis. +The program will prompt for a URL, read the JSON data +from that URL using urllib and then parse and +extract the comment counts from the JSON data, +compute the sum of the numbers in the file +and enter the sum below: +- Actual data: http://py4e-data.dr-chuck.net/comments_706604.json +- Dependencies: pip3 install bs4 ''' import json -import urllib +import urllib.request +import urllib.parse +import urllib.error count = 0 sum = 0 -url = raw_input("Enter Url - ") - -data = urllib.urlopen(url).read() -print data +url = input("Enter location: ") +print("Retrieving", url) +urlHandle = urllib.request.urlopen(url) +data = urlHandle.read() +print("Retrieved", len(data), "characters") -info = json.loads(str(data)) +info = json.loads(data) for i in info['comments']: count = count+1 sum = sum + i['count'] -print "Sum : ",sum -print "count : ",count +print("Count:", count) +print("Sum:", sum) diff --git a/Week-6/Using the GeoJSON API.py b/Week-6/Using the GeoJSON API.py index 09644ab..f444f8f 100644 --- a/Week-6/Using the GeoJSON API.py +++ b/Week-6/Using the GeoJSON API.py @@ -1,41 +1,42 @@ -''' -Calling a JSON API - -In this assignment you will write a Python program somewhat similar to http://www.pythonlearn.com/code/geojson.py. The program will prompt for a location, contact a web service and retrieve JSON for the web service and parse that data, and retrieve the first place_id from the JSON. A place ID is a textual identifier that uniquely identifies a place as within Google Maps. -API End Points - -To complete this assignment, you should use this API endpoint that has a static subset of the Google Data: - -http://python-data.dr-chuck.net/geojson -This API uses the same parameters (sensor and address) as the Google API. This API also has no rate limit so you can test as often as you like. If you visit the URL with no parameters, you get a list of all of the address values which can be used with this API. -To call the API, you need to provide a sensor=false parameter and the address that you are requesting as the address= parameter that is properly URL encoded using the urllib.urlencode() fuction as shown in http://www.pythonlearn.com/code/geojson.py -''' -import urllib import json - -serviceurl = "http://python-data.dr-chuck.net/geojson?" - -while True: - - address = raw_input("Enter location: ") - - if len(address) < 1 : break - - url = serviceurl + urllib.urlencode({'sensor':'false','address':address}) - - print 'Retrieving',url - - uh =urllib.urlopen(url) - data = uh.read() - print 'Retrived',len(data),'characters' - - try: js = json.loads(str(data)) - except: js = None - if 'status' not in js or js['status'] != 'OK': - print '==== Failure To Retrieve ====' - print data - continue - - placeid = js["results"][0]['place_id'] - print "Place id",placeid - +import ssl +import urllib.request +import urllib.parse +import urllib.error + +# Ignore SSL certificate errors +ctx = ssl.create_default_context() +ctx.check_hostname = False +ctx.verify_mode = ssl.CERT_NONE + +# Stroring the given parameters +api_key = 42 +serviceurl = "http://py4e-data.dr-chuck.net/json?" +data_address = input("Enter location: ") + +params = {"address": data_address, "key": api_key} +paramsurl = urllib.parse.urlencode(params) + +url = serviceurl.strip() + paramsurl.strip() +print("Retrieving:", url) + +# Obtaining and reading the data +try: + data_read = urllib.request.urlopen(url, context=ctx).read() + data = data_read.decode() + print("Retrived", len(data), "characters") + + # Parsing the data and looking for location info + jsondata = json.loads(data) + + if 'status' not in jsondata or jsondata['status'] != 'OK': + print("Error: Failure to retrieve") + print(data) + + # Set and print out location info to the console + place_id = jsondata["results"][0]["place_id"] + print("Place id", place_id) +except: + print("Error. Please try again.") + print("-"*30) + print(data)