Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 13 additions & 19 deletions Week-2/Extracting Data With Regular Expressions.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,20 @@
'''
In this assignment you will read through and parse a file with text and numbers.
You will extractall the numbers in the file and compute the sum of the numbers.
In this assignment you will read through and parse a file
with text and numbers. You will extract all the numbers
in the file and compute the sum of the numbers.
'''
import re

fname = raw_input('Enter File name :')
try:
fileHandle = open('./regex_sum_706599.txt')
except:
print("Error when opening file")

handle = open(fname)
text = fileHandle.read()
listNumbers = re.findall('[0-9]+', text)

sum=0
sum = 0
for strNumber in listNumbers:
sum = sum + int(strNumber)

count = 0

for line in handle:

f = re.findall('[0-9]+',line)

for num in f:

if num >= [0]:

count = count + 1
sum = sum + int(num)

print 'There are',count,'values with a sum =',sum
print(sum)
37 changes: 23 additions & 14 deletions Week-4/Following Links in HTML Using BeautifulSoup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,34 @@

We provide two files for this assignment. One is a sample file where we give you the name for your testing and the other is the actual data you need to process for the assignment

Sample problem: Start at http://python-data.dr-chuck.net/known_by_Fikret.html
Sample problem: Start at http://python-data.dr-chuck.net/known_by_Fikret.html
Find the link at position 3 (the first name is 1). Follow that link. Repeat this process 4 times. The answer is the last name that you retrieve.
Sequence of names: Fikret Montgomery Mhairade Butchi Anayah
Sequence of names: Fikret Montgomery Mhairade Butchi Anayah
Last name in sequence: Anayah
Actual problem: Start at: http://python-data.dr-chuck.net/known_by_Blanka.html
Actual problem: Start at: http://python-data.dr-chuck.net/known_by_Blanka.html
Find the link at position 18 (the first name is 1). Follow that link. Repeat this process 7 times. The answer is the last name that you retrieve.
Hint: The first character of the name of the last page that you will load is: L
'''

import urllib
import urllib.request
import urllib.parse
import urllib.error
from bs4 import BeautifulSoup
url = raw_input('Enter Url: ')
count = int(raw_input("Enter count: "))
position = int(raw_input("Enter position:"))
for i in range(count):
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
import ssl

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

url = input('Enter URL: ')
count = int(input('Enter count: '))
position = int(input('Enter position: '))

for i in range(count):
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all of the anchor tags
tags = soup('a')
s = []
t = []
Expand All @@ -31,8 +41,7 @@
s.append(x)
y = tag.text
t.append(y)

print s[position-1]
print t[position-1]
url = s[position-1]

print(s[position - 1])
print(t[position - 1])
url = s[position - 1]
53 changes: 25 additions & 28 deletions Week-4/Scraping HTML Data with BeautifulSoup.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,31 @@
'''
Scraping Numbers from HTML using BeautifulSoup
In this assignment you will write a Python program
similar to http://www.pythonlearn.com/code/urllink2.py.
The program will use urllib to read the HTML from the data files below,
and parse the data, extracting numbers and compute the
sum of the numbers in the file.
In this assignment you will write a Python program to use urllib
to read the HTML from the data files below,
and parse the data, extracting numbers
then compute the sum of the numbers in the file

We provide two files for this assignment.
One is a sample file where we give you the sum for your testing and
the other is the actual data you need to process for the assignment.

Sample data: http://python-data.dr-chuck.net/comments_42.html (Sum=2553)
Actual data: http://python-data.dr-chuck.net/comments_353539.html (Sum ends with 63)
You do not need to save these files to your folder since your program
will read the data directly from the URL. Note: Each student will have a
distinct data url for the assignment - so only use your own data url for analysis.
if you haven't install BeautifulSoup4 before
please run: pip3 install bs4
'''
import urllib
import urllib.request
import urllib.parse
import urllib.error
from bs4 import BeautifulSoup

url = raw_input('Enter - ')
url = input("Enter - ")
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')

# Retrieve all of the anchor tags
tags = soup("span")

count = 0
sum = 0

for tag in tags:
temp = int(tag.text)
count += 1
sum += temp

html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
tag = soup("span")
count=0
sum=0
for i in tag:
x=int(i.text)
count+=1
sum = sum + x
print count
print sum
print("Count", count)
print("Sum", sum)
31 changes: 18 additions & 13 deletions Week-5/Extracting Data from XML.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,26 @@
Actual data: http://python-data.dr-chuck.net/comments_353536.xml (Sum ends with 90)
You do not need to save these files to your folder since your program will read the data directly from the URL. Note: Each student will have a distinct data url for the assignment - so only use your own data url for analysis.
'''
import urllib
import urllib.request
import urllib.parse
import urllib.error
import xml.etree.ElementTree as ET
import ssl

url = raw_input("Enter - ")
uh = urllib.urlopen(url)
data = uh.read()
url = input("Enter location: ")
urlHandle = urllib.request.urlopen(url)
data = urlHandle.read()

tree = ET.fromstring(data)
results = tree.findall('comments/comment')
count =0
sum=0
for item in results:
x = int(item.find('count').text)
count =count+1
sum = sum+x
pData = tree.findall('comments/comment')

print "Count : ",count
print "Sum : ",sum
count = 0
sum = 0

for item in pData:
temp = int(item.find('count').text) # get the data in count element
count = count + 1
sum = sum + temp

print("Count", count)
print("Sum", sum)
32 changes: 18 additions & 14 deletions Week-6/Extracting Data from JSON.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
'''
In this assignment you will write a Python program somewhat similar to http://www.pythonlearn.com/code/json2.py. The program will prompt for a URL, read the JSON data from that URL using urllib and then parse and extract the comment counts from the JSON data, compute the sum of the numbers in the file and enter the sum below:
We provide two files for this assignment. One is a sample file where we give you the sum for your testing and the other is the actual data you need to process for the assignment.

Sample data: http://python-data.dr-chuck.net/comments_42.json (Sum=2553)
Actual data: http://python-data.dr-chuck.net/comments_353540.json (Sum ends with 71)
You do not need to save these files to your folder since your program will read the data directly from the URL. Note: Each student will have a distinct data url for the assignment - so only use your own data url for analysis.
The program will prompt for a URL, read the JSON data
from that URL using urllib and then parse and
extract the comment counts from the JSON data,
compute the sum of the numbers in the file
and enter the sum below:
- Actual data: http://py4e-data.dr-chuck.net/comments_706604.json
- Dependencies: pip3 install bs4
'''
import json
import urllib
import urllib.request
import urllib.parse
import urllib.error

count = 0
sum = 0
url = raw_input("Enter Url - ")

data = urllib.urlopen(url).read()

print data
url = input("Enter location: ")
print("Retrieving", url)
urlHandle = urllib.request.urlopen(url)
data = urlHandle.read()
print("Retrieved", len(data), "characters")

info = json.loads(str(data))
info = json.loads(data)

for i in info['comments']:
count = count+1
sum = sum + i['count']
print "Sum : ",sum
print "count : ",count

print("Count:", count)
print("Sum:", sum)
81 changes: 41 additions & 40 deletions Week-6/Using the GeoJSON API.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
'''
Calling a JSON API

In this assignment you will write a Python program somewhat similar to http://www.pythonlearn.com/code/geojson.py. The program will prompt for a location, contact a web service and retrieve JSON for the web service and parse that data, and retrieve the first place_id from the JSON. A place ID is a textual identifier that uniquely identifies a place as within Google Maps.
API End Points

To complete this assignment, you should use this API endpoint that has a static subset of the Google Data:

http://python-data.dr-chuck.net/geojson
This API uses the same parameters (sensor and address) as the Google API. This API also has no rate limit so you can test as often as you like. If you visit the URL with no parameters, you get a list of all of the address values which can be used with this API.
To call the API, you need to provide a sensor=false parameter and the address that you are requesting as the address= parameter that is properly URL encoded using the urllib.urlencode() fuction as shown in http://www.pythonlearn.com/code/geojson.py
'''
import urllib
import json

serviceurl = "http://python-data.dr-chuck.net/geojson?"

while True:

address = raw_input("Enter location: ")

if len(address) < 1 : break

url = serviceurl + urllib.urlencode({'sensor':'false','address':address})

print 'Retrieving',url

uh =urllib.urlopen(url)
data = uh.read()
print 'Retrived',len(data),'characters'

try: js = json.loads(str(data))
except: js = None
if 'status' not in js or js['status'] != 'OK':
print '==== Failure To Retrieve ===='
print data
continue

placeid = js["results"][0]['place_id']
print "Place id",placeid

import ssl
import urllib.request
import urllib.parse
import urllib.error

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

# Stroring the given parameters
api_key = 42
serviceurl = "http://py4e-data.dr-chuck.net/json?"
data_address = input("Enter location: ")

params = {"address": data_address, "key": api_key}
paramsurl = urllib.parse.urlencode(params)

url = serviceurl.strip() + paramsurl.strip()
print("Retrieving:", url)

# Obtaining and reading the data
try:
data_read = urllib.request.urlopen(url, context=ctx).read()
data = data_read.decode()
print("Retrived", len(data), "characters")

# Parsing the data and looking for location info
jsondata = json.loads(data)

if 'status' not in jsondata or jsondata['status'] != 'OK':
print("Error: Failure to retrieve")
print(data)

# Set and print out location info to the console
place_id = jsondata["results"][0]["place_id"]
print("Place id", place_id)
except:
print("Error. Please try again.")
print("-"*30)
print(data)