Created by Ivan Lima on Thu Jan 23 2020 20:30:50 -0500
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime, os
import cartopy.crs as ccrs
import cartopy.feature as cfeature
pd.options.display.max_columns = 50
print('Last updated on {}'.format(datetime.datetime.now().ctime()))
Last updated on Fri Jan 24 12:00:49 2020
# labels corresponding to missing data
na_values = ['No observation', 'No observations', 'No Observation', 'No Observations',
'no observation', 'no observations', 'None given', 'none given', 'None Given', 'none',
'none recorded', 'not recorded', 'None recorded', 'Not given', 'not given', ' ']
# read CSV file
df = pd.read_csv('data/log-entries-export-with-text-blocks-2020-01-19.csv', parse_dates={'DateTime':[3,4]},
na_values=na_values)
df.loc[df.DateTime.str.endswith(' nan'), 'DateTime'] = np.nan
df.loc[1855,'DateTime'] = '1868-05-13 12:00:00'
df['Entry Date Time'] = pd.to_datetime(df.DateTime, format='%Y-%m-%d %H:%M:%S')
df = df.drop('DateTime', axis=1)
df['ID'] = df.ID.str.lstrip('\ufeff')
df['ID'] = df.ID.astype(np.int32)
df.loc[df.Page=='N','Page'] = np.nan
df.loc[df.Page=='30-31','Page'] = 30
df['Page'] = pd.to_numeric(df.Page)
df.loc[1313:1314,'Depth'] = '12.5'
df.loc[2799,'Depth'] = '20'
df['Depth'] = df.Depth.str.rstrip('Fathoms')
df['Depth'] = pd.to_numeric(df.Depth)
# set additional missing values
for vname in ['Ship Heading/Course', 'Wind Direction', 'Wind Speed/Force']:
df.loc[df[vname].str.strip().str.startswith('none').fillna(False), vname] = np.nan
df.loc[df[vname].str.strip().str.startswith('nnone').fillna(False), vname] = np.nan
df.loc[df[vname].str.strip().str.startswith('not recorded').fillna(False), vname] = np.nan
# Clean/standardize Wind Direction
df['Wind Direction'] = df['Wind Direction'].str.lstrip('from ')
df['Wind Direction'] = df['Wind Direction'].str.lstrip('From ')
df['Wind Direction'] = df['Wind Direction'].str.replace('Calm','calm')
df['Wind Direction'] = df['Wind Direction'].str.replace('Off shore','off shore')
df.loc[df['Wind Direction']=='e', 'Wind Direction'] = 'E'
for st in ['ine winds','light wind','Light airs','direction']:
df.loc[df['Wind Direction']==st, 'Wind Direction'] = np.nan
for st in ['"All round the House"','All around','All directions','Changeable','Variable']:
df['Wind Direction'] = df['Wind Direction'].str.replace(st,'variable')
for st in ['Easterly','Eastward','East']:
df['Wind Direction'] = df['Wind Direction'].str.replace(st,'E')
for st in ['Westerly','Westward','West']:
df['Wind Direction'] = df['Wind Direction'].str.replace(st,'W')
for st in ['Southward','South']:
df['Wind Direction'] = df['Wind Direction'].str.replace(st,'S')
# Clean/standardize Wind Speed/Force
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.lower()
for st in ['from ne', 'sw']:
df.loc[df['Wind Speed/Force']==st, 'Wind Speed/Force'] = np.nan
for word in ['widns','windq','"wind"']:
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace(word,'winds')
for word in ['breezs','breeeze']:
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace(word,'breezes')
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace('breezesd','breezed')
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace('fne','fine')
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace('string','strong')
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace('aires','airs')
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace('light light winds','light winds')
df.loc[df['Wind Speed/Force']=='light bafflin', 'Wind Speed/Force'] = 'light baffling'
for st in ['"baffling"','blaffling','bafling','baflin']:
df['Wind Speed/Force'] = df['Wind Speed/Force'].str.replace(st,'baffling')
for word in ['wind','breeze']: # convert to plural
df.loc[df['Wind Speed/Force'].str.endswith(word).fillna(False),'Wind Speed/Force'] = (
df.loc[df['Wind Speed/Force'].str.endswith(word).fillna(False),'Wind Speed/Force'].str.replace(word,word+'s'))
# Clean/standardize Sea State
df['Sea State'] = df['Sea State'].str.replace('"A Big Swell going"','Big Swell')
df['Sea State'] = df['Sea State'].str.replace('Big swell going','Big Swell')
df['Sea State'] = df['Sea State'].str.replace('Rough, running under topsails','Rough')
df.loc[df['Sea State']=='Rough Sea', 'Sea State'] = 'Rough Seas'
df.loc[df['Sea State']=='Heavy Sea', 'Sea State'] = 'Heavy Seas'
for st in ['Calm','Calms']:
df['Sea State'] = df['Sea State'].str.replace(st,'calm')
for word in ['Heavy','Swell','Bad','Very','Large','Rough','Rugged','Big','High','Water','Seas','Sea','Running',
'Running','Remarkably','Considerable','Heaving','Moderate','Pleasant','Smooth','Unsettled']:
df.loc[df['Sea State'].str.contains(word).fillna(False),'Sea State'] = (
df.loc[df['Sea State'].str.contains(word).fillna(False),'Sea State'].str.replace(word, word.lower()))
# Clear/standardize Cloud Cover
df['Cloud Cover'] = df['Cloud Cover'].str.lower()
df['Cloud Cover'] = df['Cloud Cover'].str.replace('smokey','smoky')
# Clear/standardize Weather
df['Weather'] = df['Weather'].str.lower()
df['Weather'] = df['Weather'].str.replace('"','')
df['Weather'] = df['Weather'].str.replace('caer','clear')
df['Weather'] = df['Weather'].str.replace('smokey','smoky')
df['Weather'] = df['Weather'].str.replace('varable','variable')
# Reorder columns
cols = df.columns.tolist()
newcols = cols[0:3] + [cols[-1]] + cols[3:-1]
df = df[newcols]
def parse_latlon(x):
if pd.isna(x):
pos = x
elif x.lower() == 'equator':
pos = 0.0
else:
for char in ['Â','º',"'",'1/2','(DR)']:
x = x.replace(char, ' ')
lstr = x.rstrip()
tokens = lstr.split()
if len(tokens) == 4:
degrees, minutes, seconds, hem = tokens
pos = np.float(degrees) + (np.float(minutes)+np.float(seconds)/60.)/60.
elif len(tokens) == 3:
degrees, minutes, hem = tokens
pos = np.float(degrees) + np.float(minutes.strip("'"))/60.
elif len(tokens) == 2:
degrees, hem = tokens
pos = np.float(degrees)
else:
pos, hem = np.nan, ''
if hem.upper() in ['S', 'W']:
pos = -pos
return pos
df['Latitude'] = df.Latitude.map(parse_latlon)
df['Longitude'] = df.Longitude.map(parse_latlon)
# write clean data to CSV file
df.to_csv('data/log-entries-export-with-text-blocks-2020-01-19-clean.csv', index=False)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5209 entries, 0 to 5208 Data columns (total 32 columns): ID 5209 non-null int32 LogBook ID 5206 non-null object Page 2785 non-null float64 Entry Date Time 5205 non-null datetime64[ns] Latitude 3243 non-null float64 Longitude 2740 non-null float64 Depth 100 non-null float64 Depth Unit 128 non-null object Bottom 1 non-null object Current 11 non-null object Landmark 2086 non-null object Ship Heading/Course 2704 non-null object Wind Direction 4394 non-null object Wind Speed/Force 4821 non-null object Sea State 510 non-null object Cloud Cover 430 non-null object Weather 2243 non-null object 2. Ship Heading/Course 1976 non-null object 2. Wind Direction 2434 non-null object 2. Wind Speed/Force 2778 non-null object 2. Sea State 212 non-null object 2. Cloud Cover 123 non-null object 2. Weather 1114 non-null object 3. Ship Heading 715 non-null object 3. Wind Direction 993 non-null object 3. Wind Speed/Force 1124 non-null object 3. Sea State 110 non-null object 3. Cloud Cover 124 non-null object 3. Weather 617 non-null object Ship Sightings 222 non-null object Instrumental Observations 156 non-null object Miscellaneous Observations 581 non-null object dtypes: datetime64[ns](1), float64(4), int32(1), object(26) memory usage: 1.3+ MB
fig, ax = plt.subplots(figsize=(9.5,9),subplot_kw={'projection':ccrs.Robinson()})
_ = ax.set_global()
_ = ax.coastlines(linewidth=0.5)
_ = ax.add_feature(cfeature.LAND, facecolor='#b0b0b0')
for yr in sorted(df['Entry Date Time'].dt.year.unique()):
work = df[df['Entry Date Time'].dt.year==yr]
_ = ax.plot(work.Longitude, work.Latitude, '+', transform=ccrs.PlateCarree(), label='{:.0f}'.format(yr))
_ = ax.legend(loc=(1.01,-0.475))
for col in ['Wind Direction', 'Wind Speed/Force', 'Sea State', 'Cloud Cover', 'Weather']:
print(col+':')
vallist = df[col].dropna().unique()
vallist.sort()
print(vallist, '\n')
Wind Direction: ['60' 'E' 'E NE' 'E SE' 'E and SE' 'E by N' 'E by S' 'E to E SE' 'E to ESE' 'E to NE' 'E to NNE' 'E to S' 'E to SE' 'E to W' 'E to W by N' 'E to W by S' 'ENE' 'ENE to NE' 'ENE to SE' 'ESE' 'ESE to E' 'ESE to SE' 'ESE to SSE' 'ESE to W' 'ESE to W by S' 'N' 'N NE' 'N NW' 'N NW to N NE' 'N NW to NE' 'N by E' 'N by E to E by S' 'N by E to NNW' 'N by W' 'N to N NW' 'N to NE' 'N to SE' 'N to SW' 'N to Sw' 'N to W' 'N to W NW' 'N to WNW' 'N, then NW' 'NE' 'NE by E' 'NE by N' 'NE to E NE' 'NE to ENE' 'NE to NNW' 'NE to NW' 'NE to NW & then to SW' 'NE to W' 'NNE' 'NNE to NNW' 'NNW' 'NNW to SW' 'NNW to W' 'NW' 'NW and NW by N' 'NW by N' 'NW by W' 'NW to SW' 'NW to WNW' 'S' 'S & E & NE' 'S SE' 'S SW' 'S by E' 'S by W' 'S heading into E' 'S to E by N' 'S to ESE' 'S to SE' 'S to SSE' 'S to SSW' 'S to SW' 'S to W by S' 'SE' 'SE by E' 'SE by ESE' 'SE by S' 'SE to E' 'SE to E by S' 'SE to ESE' 'SE to N' 'SE to NE' 'SE to S SE' 'SE to SSE' 'SE to SSE to W' 'SE to SW' 'SE to W by N' 'SE to WNW' 'SSE' 'SSE to NE' 'SSE to SE' 'SSE to SSW' 'SSSE to SW' 'SSW' 'SSW to SSE' 'SW' 'SW and SSE' 'SW by S' 'SW by W' 'SW to NE' 'SW to S' 'SW to SE' 'SW to SSW' 'SW to WSW' 'W' 'W NW' 'W NW to W' 'W SW' 'W SW to SW' 'W [?; none recorded]' 'W and NW' 'W and W by N' 'W by N' 'W by S' 'W then SW' 'W to N' 'W to NW by W' 'W to S' 'W to S SW' 'W to SW' 'W to W NW' 'W to WNW' 'WNW' 'WNW and NW' 'WSW' 'WSW to S' 'WSW to SW' 'calm' 'off shore' 'stalled to W & SW' 'the land' 'to E' 'to SE by SW' 'to WSW' 'variable' 'variable ENE to NNE' 'variable ESE to NE' 'variable NE to NW' 'variable NE to SSW' 'variable NNE to S' 'variable NNW to WSW' 'variable NW to WSW' 'variable SE to ENE' 'variable SE to NE' 'variable SSW to SSE' 'variable SW to ENE' 'variable WNW to SW' 'variable WSW to SSW' 'variable mostly from the E' 'variable mostly from the N' 'variable mostly from the S'] Wind Speed/Force: ['12' '2 wind b' '3' 'airs' 'blowing a fresh gale' 'blowing a gale' 'blowing a gale of winds' 'blowing a heavy gale' 'blowing a moderate gale' 'blowing a strong gale' 'blowing fresh' 'blowing hard' 'blowing strong' 'blows' 'blows fresh' 'blows on' 'blows strong' 'breezed up' 'breezes' 'breezesd up to brisk breezes' 'brisk breezes' 'brisk gales' 'brisk trade' 'brisk trades' 'brisk winds' 'calm' 'calm & baffling' 'calm and light winds' 'calm no winds' 'calms' 'changeable winds' 'double reef topsail breezes' 'fair' 'fair breezes' 'fair winds' 'fine' 'fine breezes' 'fine fresh breezes' 'fine gales' 'fine light winds' 'fine moderate breezes' 'fine trades' 'fine winds' 'fine, moderate winds' 'fresh' 'fresh and light breezes' 'fresh and light winds' 'fresh breezes' 'fresh gale' 'fresh gales' 'fresh land breezes' 'fresh trades' 'fresh winds' 'gale' 'gale continued' 'gale increasing' 'gale moderated' 'gale moderated a little' 'gale moderated some' 'gale, moderated a little' 'gales' 'gentle' 'gentle breezes' 'gentle gale' 'gentle gales' 'gentle winds' 'good breezes' 'heavy breezes' 'heavy gale' 'heavy gales' 'heavy winds' 'high winds' 'increased to a gale' 'light' 'light & variable' 'light air' 'light airs' 'light airs & calm' 'light airs & calms' 'light airs and calms' 'light airs and variable' 'light airs and very baffling' 'light airs, with calms' 'light airs/calms' 'light and baffling winds' 'light baffling' 'light baffling airs' 'light baffling winds' 'light breezes' 'light breezes and baffling' 'light gales' 'light sea breezes' 'light tempest' 'light trades' 'light variable winds' 'light variable winds & calms' 'light variable winds and calms' 'light winds' 'light winds & baffling' 'light winds & calms' 'light winds & variable' 'light winds and variable' 'moderate' 'moderate and baffling winds' 'moderate baffling winds' 'moderate breezes' 'moderate gale' 'moderate gales' 'moderate trade winds' 'moderate trades' 'moderate winds' 'moderate winds & baffling' 'moderate winds & variable' 'moderate winds between squalls' 'more moderate' 'pleasant breezes' 'pleasant winds' 'small breezes' 'small winds' 'smart breezes' 'squall winds' 'squalls' 'strong airs' 'strong blows' 'strong breeze and squally winds' 'strong breezes' 'strong gale' 'strong gale (slowly decreasing)' 'strong gales' 'strong south' 'strong squall' 'strong squalls' 'strong trade' 'strong trade winds' 'strong trades' 'strong wind under double reefed top sails' 'strong wind, heavy squall with a tempest' 'strong winds' 'strong winds & variable' 'strong winds and breezes' 'strong winds and light gales' 'variable' 'variable winds & weather' 'variable winds & weather with calms' 'very fine breezes' 'very fine winds' 'very heavy gale' 'very light air' 'very strong winds' 'wind continued and moderated a little' 'wind died away, shifted suddenly' 'wind increased' 'wind increased to a gale' 'wind light and baffling' 'wind moderated a little' 'winds'] Sea State: ['Easterly swell' 'bad sea from W' 'bad sea on' 'bad swell from NNW' 'big swell' 'calm' 'calm & lufting' 'calms' 'calms from W' 'considerable swell' 'flat' 'heaving' 'heavy' 'heavy SE swell' 'heavy Westerly swell' 'heavy coming from West' 'heavy from SE' 'heavy from SW' 'heavy seas' 'heavy swell' 'heavy swell from NW' 'heavy swell from SW' 'heavy swell from SW to W' 'heavy swell from W' 'heavy swell from the Eastward' 'heavy swell to NE' 'heavy swells' 'high cross sea on' 'high sea' 'high sea on' 'high seas' 'high swell' 'high swell from SW' 'large sea' 'large sea from SW' 'large swell' 'large swell from SSW' 'large swell from SW' 'light chop' 'moderate' 'pleasant' 'quite rugged' 'remarkably rough' 'rough' 'rough Easterly sea' 'rough from SW' 'rough running to the North' 'rough seas' 'rugged' 'rugged sea' 'rugged, heavy sea' 'rugged; bad swell' 'running very high' 'smooth' 'state' 'swell' 'swells from the West' 'tide ripy' 'troublesome' 'unsettled' 'very bad' 'very heavy SE swell' 'very heavy sea from SW' 'very heavy swell' 'very heavy swell from S.' 'very heavy swell from SE' 'very high SW swell' 'very large sea from S' 'very rough' 'very rough from S' 'very rough from SE and NE' 'very rough sea' 'very rugged' 'very rugged; heavy sea' 'very rugged; heavy swell' 'water greenish' 'water wavy'] Cloud Cover: ['100%' 'clear' 'clear & cloudy' 'cloudy' 'cover' 'fine' 'fog' 'foggy' 'heavy thick fog' 'overcast' 'passing clouds' 'smoky' 'thick' 'thick and cloudy' 'thick and foggy' 'thick clouds' 'thick cloudy' 'thick fog' 'very clear' 'very cloudy' 'very foggy' 'very smoky'] Weather: ['a little squally' 'all sorts' 'bad' 'bad rainy weather' 'bad weather' 'bad weather; strong squalls' 'bad; squally' 'beautiful' 'blowy' 'calm' 'calm & fair' 'calm with fog' 'calm, calm, calm, calm.' 'calm; fine' 'calm; fine; clear' 'calm; hot' 'calm; very hot' 'calm; very warm' 'calms' 'calms & rain' 'calms & some light rain' 'calms at intervals' 'calms; calms' 'calms; rainy' 'cear; fine' 'changeable' 'changeable, with thunder & lightning; much rain' 'clear' 'clear & fine' 'clear weather' 'clear; calm' 'clear; fine' 'cleared up' 'cloudy' 'cloudy smoky weather' 'cloudy weather' 'cloudy; unsettled' 'dark; raining; squalls' 'drizzly' 'drizzly weather' 'drizzly, rainy' 'extra fine' 'fair' 'fair weather' 'fair; rain squalls' 'few light rain squalls' 'fewer rain squalls' 'fine' 'fine & clear' 'fine & hot' 'fine and hot as usual' 'fine and light' 'fine and pleasant' 'fine clear' 'fine dry times' 'fine moderate weather' 'fine pleasant' 'fine showers of rain' 'fine too fine; blast the place. [!]' 'fine weather' 'fine with a few light passing rain showers' 'fine, pleasant' 'fine; clear' 'fine; pleasant' 'fine; rain all day' 'fine; some showers' 'fog' 'fog & squalls' 'fog squalls' 'fog; squalls' 'foggy' 'foggy, rainy' 'gales' 'good' 'good weather' 'hail and rain' 'hail squalls' 'hard rain' 'hard weather' 'hazy' 'hazy, smoky weather' 'heavy' 'heavy rain' 'heavy rain and squall' 'heavy rain and squalls' 'heavy rain from se' 'heavy rain squall' 'heavy rain squalls' 'heavy rain squalls from wsw' 'heavy rain; bad weather' 'heavy rains' 'heavy showers of rain' 'heavy squall' 'heavy squall from nw' 'heavy squall from the n' 'heavy squall from w' 'heavy squall from wnw' 'heavy squall from wsw' 'heavy squalls' 'heavy squalls and much rain' 'heavy squalls and rain' 'heavy squalls of rain' 'heavy squalls of wind and rain' 'heavy squalls with rain' 'heavy squalls, rainy' 'heavy swell from s' 'heavy thunder and lightning' 'hot' 'hot -- awful hot.' 'hot as tophet [hell]' 'hot; some rain' 'intermittent calms' 'large swell from ssw' 'light' 'light baflin weather' 'light most of the time' 'light rain' 'light rain squall' 'light rain squalls' 'light squalls & cloudy' 'little rain' 'looking bad' 'misty' 'moderate' 'moderate weather' 'moderate, thick, rainy weather' 'moderated' 'moderates' 'moderates some' 'more moderate' 'much rain' 'nasty' 'nice' 'not as warm as weekend' 'nw' 'one heavy squall' 'one or two squalls' 'one quite smart squall from w' 'overcast' 'passing rain squalls' 'passing rain squalls blowing strong' 'passing squalls' 'pleasant' 'pleasant fair weather' 'pleasant weather' 'plenty of rain' 'rain' 'rain all day' 'rain and all kinds of weather' 'rain some' 'rain squall' 'rain squalls' 'rain squalls; bad weather' 'raining' 'rainy' 'rainy & squally' 'rainy and squally' 'rainy calm' 'rainy squalls' 'rainy squally' 'rainy squally weather' 'rainy weather' 'rainy; squally' 'rainy; thick' 'rainy; thunder & lightning' 'rough' 'rugged' 'rugged; light rain squalls' 's' 'se' 'several heavy squalls' 'several squalls, rain' 'shipped much water' 'showers of rain' 'small rain showers' 'small showers of rain' 'small squalls of rain' 'smoky' 'smoky weather' 'smoky; hazy' 'smoky; thunder' 'some light rain' 'some rain' 'some rain squalls' 'some rains' 'some squalls' 'some squally' 'squall of rain' 'squall with thunder' 'squalls' 'squalls and rain' 'squalls and rain from s' 'squalls and some rain' 'squalls and thick weather' 'squalls between some rain' 'squalls from nw' 'squalls of rain' 'squalls with rain' 'squalls with some rain' 'squalls; rain' 'squalls; rain; bad weather' 'squally' 'squally & much rain' 'squally & rain' 'squally & rainy' 'squally and rainy' 'squally and sharp lightning' 'squally at sunset' 'squally looking' 'squally rainy' 'squally with heavy falls of rain, thunder & lightning' 'squally with much rain' 'squally with passing rain squalls' 'squally with rain' 'squally with some rain' 'squally, plenty of rain' 'squally, rainy, bad weather' 'squally, thunder and lightning, rain' 'squally; bad weather' 'squally; cloudy' 'squally; cloudy; rainy' 'squally; heavy rains; thunder' 'squally; rainy' 'strong gale' 'strong gales' 'strong indications of a gale' 'strong squall from the ne' 'strong squalls' 'strong squalls; hard rain' 'thick' 'thick & cloudy' 'thick and hazy' 'thick and rainy' 'thick and rough' 'thick and smoky' 'thick and squally' 'thick and squally, weather bad' 'thick cloudy weather' 'thick fog' 'thick foggy' 'thick foggy weather' 'thick rain' 'thick smoky weather' 'thick smonky?' 'thick squally' 'thick weather' 'thick with rain' 'thick, bad weather' 'thick, rainy weather' 'thick, rainy; bad weather' 'thick, smoky' 'thick; raining' 'thick; raining; squally' 'thick; rainy' 'thick; smoky' 'thick; squally' 'thunder & lightning; some rain' 'thunder and lightning' 'thunder, lightning, and rain' 'variable' 'variable with some rain squalls' 'very bad weather' 'very dark weather' 'very fair' 'very fair & pleasant' 'very fine' 'very fine & pleasant weather' 'very hazy' 'very heavy rain' 'very heavy squall from nw' 'very heavy squalls' 'very hot' 'very pleasant' 'very rainy' 'very rugged' 'very smoky' 'very squally' 'very thick' 'very thick; hazy' 'very warm' 'warm' 'weather' 'weather (rain?)' 'wind moderating and backing']