In this, we will analyze the earthquake data using API
Downloading Earthquake Dataset
"""
Downloading and Parsing Earthquake JSON Data
"""
import urllib.request, urllib.parse, urllib.error
import json
earthquakeURL = "http://earthquake.usgs.gov/fdsnws/event/1/query?"
paramD = dict()
paramD["format"] = "geojson" # the format the data will be in
paramD["starttime"] = "2019-06-01T00:00:00" # the minimum date/time that might be retrieved
paramD["endtime"] = "2019-06-30T23:59:59" # the maximum date/time that might be retrieved
paramD["minmag"] = 6 # the smallest earthquake magnitude to return
paramD["limit"] = 5 # the maximum number of earthquakes to return
# starts with the most recent
params = urllib.parse.urlencode(paramD)
print('Retrieving', earthquakeURL+params)
uh = urllib.request.urlopen(earthquakeURL+params)
data = uh.read().decode()
print('Retrieved', len(data), 'characters')
try:
js = json.loads(data)
except:
js = None
if not js or 'type' not in js :
print('==== Failure To Retrieve ====')
print(data)
# Output first Record
print("\nFirst Earthquake")
lng = js["features"][0]["geometry"]["coordinates"][0] # retrieve the first item in features array
lat = js["features"][0]["geometry"]["coordinates"][1] # look in "geometry" object
dep = js["features"][0]["geometry"]["coordinates"][2] # get the first, second, and third coordinates
print('lng', lng, 'lat', lat, 'depth', dep)
# retrieve the first item in features array, look in the properties object, return the place object
location = js["features"][0]["properties"]["place"]
print(location, "\n")
# Loop through entire data set
print("\nAll Earthquakes")
count = 0
for f in js["features"]:
lng = f["geometry"]["coordinates"][0]
lat = f["geometry"]["coordinates"][1]
dep = f["geometry"]["coordinates"][2]
print('lng', lng, 'lat', lat, 'depth', dep)
location = f["properties"]["place"]
print(location, "\n")
count = count+1
print(count)
Output:
Retrieving http://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2019-06-01T00%3A00%3A00&endtime=2019-06-30T23%3A59%3A59&minmag=6&limit=5
Retrieved 4819 characters
First Earthquake
lng 144.3477 lat 19.8515 depth 410
95km SW of Farallon de Pajaros, Northern Mariana Islands
All Earthquakes
lng 144.3477 lat 19.8515 depth 410
95km SW of Farallon de Pajaros, Northern Mariana Islands
lng -179.182 lat -30.3961 depth 10
118km NNW of L'Esperance Rock, New Zealand
lng -82.754 lat 8.4612 depth 32.58
5km ESE of Aserrio de Gariche, Panama
lng 164.0881 lat 56.1821 depth 10
100km E of Ust'-Kamchatsk Staryy, Russia
lng 164.2327 lat 56.202 depth 10
108km E of Ust'-Kamchatsk Staryy, Russia
Output: 5
Load data set
#load data in json format
import json
js = json.loads(data)
js
Output like this
{'type': 'FeatureCollection', 'metadata': {'generated': 1585358568000, 'url': 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2019-06-01T00%3A00%3A00&endtime=2019-06-30T23%3A59%3A59&minmag=6&limit=5', 'title': 'USGS Earthquakes', 'status': 200, 'api': '1.8.1', 'limit': 5, 'offset': 1, 'count': 5},
............
Step 1: Download your first Earthquakes
#USGS Earthquakes
#Instead of printing out data for the individual earthquakes, extract and print the "count" from the "metadata" returned about the set of earthquakes returned.
meta_data = js['metadata']
meta_data
Output:
{'generated': 1585358568000, 'url': 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2019-06-01T00%3A00%3A00&endtime=2019-06-30T23%3A59%3A59&minmag=6&limit=5', 'title': 'USGS Earthquakes', 'status': 200, 'api': '1.8.1', 'limit': 5, 'offset': 1, 'count': 5}
#count the first earthquake data
earth_count = meta_data['count']
print(earth_count)
5
#Extract the entire feature array from the JSON data returned - this is the array of earthquakes.
features_array = js['features']
features_array
Output:
[{'type': 'Feature', 'properties': {'mag': 6.4, 'place': '95km SW of Farallon de Pajaros, Northern Mariana Islands', 'time': 1561737091302, 'updated': 1584720093031, 'tz': 600, 'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/us700046la', 'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=us700046la&format=geojson', 'felt': None, 'cdi': None, 'mmi': 2.961, 'alert': 'green', 'status': 'reviewed', 'tsunami': 1, 'sig': 630, 'net': 'us', 'code': '700046la', 'ids': ',us700046la,pt19179000,at00pttg1w,', 'sources': ',us,pt,at,', 'types': ',geoserve,ground-failure,impact-link,losspager,moment-tensor,oaf,origin,phase-data,shakemap,', 'nst': None, 'dmin': 4.787, 'rms': 1.16, 'gap': 26, 'magType': 'mww', 'type': 'earthquake', 'title': 'M 6.4 - 95km SW of Farallon de Pajaros, Northern Mariana Islands'}, 'geometry': {'type': 'Point', 'coordinates': [144.3477, 19.8515, 410]}, 'id': 'us700046la'},
....
....
}]
#Get the last earthquake in the feature array (now a list since it is in Python) using the count variable.
last_earth_quake_features_type = features_array[ earth_count - 1]
last_earth_quake_features_type
Output:
{'type': 'Feature', 'properties': {'mag': 6.4, 'place': "108km E of Ust'-Kamchatsk Staryy, Russia", 'time': 1561453540478, 'updated': 1585316522948, 'tz': 660, 'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/us600045mz', 'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=us600045mz&format=geojson', 'felt': 1, 'cdi': 2.7, 'mmi': 5.253, 'alert': 'green', 'status': 'reviewed', 'tsunami': 1, 'sig': 630, 'net': 'us', 'code': '600045mz', 'ids': ',us600045mz,pt19176001,at00ptn9l9,', 'sources': ',us,pt,at,', 'types': ',dyfi,geoserve,ground-failure,impact-link,losspager,moment-tensor,origin,phase-data,shakemap,', 'nst': None, 'dmin': 4.546, 'rms': 0.57, 'gap': 55, 'magType': 'mww', 'type': 'earthquake', 'title': "M 6.4 - 108km E of Ust'-Kamchatsk Staryy, Russia"}, 'geometry': {'type': 'Point', 'coordinates': [164.2327, 56.202, 10]}, 'id': 'us600045mz'}
Step 2: Find the time of the Last Earthquake you Downloaded
#Step 2: Find the time of the Last Earthquake you Downloaded
find_last_earth_quake_time = last_earth_quake_features_type['properties']['time']
find_last_earth_quake_time
1561453540478
import datetime
dt3 = datetime.datetime.fromtimestamp(find_last_earth_quake_time / 1000)
dt3
datetime.datetime(2019, 6, 25, 14, 35, 40, 478000)
import pandas
result=pandas.to_datetime(dt3)
str(result)
'2019-06-25 14:35:40.478000'
#time in iso formate
endtime = result.replace(microsecond=0).isoformat()
endtime
'2019-06-25T14:35:40'
paramD["endtime"] = endtime
paramD["endtime"]
'2019-06-25T14:35:40'
Step 3: Add a Loop to Download Different Sets of Earthquakes
#Step 3: Add a Loop to Download Different Sets of Earthquakes
import time
print("looping...")
# Loop through entire data set
print("\nAll Earthquakes")
# While loop
count = 0
while count < earth_count:
i = 0
f = features_array[0]
lng = f["geometry"]["coordinates"][0]
lat = f["geometry"]["coordinates"][1]
dep = f["geometry"]["coordinates"][2]
print('lng', lng, 'lat', lat, 'depth', dep)
location = f["properties"]["place"]
print(location, "\n")
time.sleep(2)
count = count+1
Output:
looping... All Earthquakes lng 144.3477 lat 19.8515 depth 410 95km SW of Farallon de Pajaros, Northern Mariana Islands lng 144.3477 lat 19.8515 depth 410 95km SW of Farallon de Pajaros, Northern Mariana Islands lng 144.3477 lat 19.8515 depth 410 95km SW of Farallon de Pajaros, Northern Mariana Islands lng 144.3477 lat 19.8515 depth 410 95km SW of Farallon de Pajaros, Northern Mariana Islands lng 144.3477 lat 19.8515 depth 410 95km SW of Farallon de Pajaros, Northern Mariana Islands
Step 4: Write Data to MongoDB
#Done it your-self
#!pip3 install pymongo
import json
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
#Before the start of your loop you will want to create a database to hold your earthquake data (e.g. earthquakedb), then create a collection to hold your earthquakes (eg. earthquakes).
db = client['earthquakedb']
collection_earthquakes = db['earthquakes']
#Inside the loop, you will want to ADD the features array you get from the JSON data to the MongoDB earthquakes collection. Remember features contains multiple earthquakes so you will want to use the command that allows you to add multiple items to a collection.
i=0
while(i<=4):
for doc in features_array[i]:
try:
db.collection_earthquakes.insert_one(doc)
except:
continue
i = i+1
#print one record
data1 = db.collection_earthquakes.find_one()
data1
Step 5: Error Checking
Before the start of the loop you will want to checks to see if any earthquake data has been downloaded yet and resets the endtime if it has (earthquakes are downloaded most recent first so if data has already been downloaded, it is the endtime that will change). This way you can stop and start the download and not get duplicate data in the database.
Contact us for this machine learning assignment Solutions by Codersarts Specialist who can help you mentor and guide for such machine learning assignments.
If you have project or assignment files, You can send at contact@codersarts.com directly