-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_climate_data_bad.py
More file actions
67 lines (55 loc) · 2.44 KB
/
plot_climate_data_bad.py
File metadata and controls
67 lines (55 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from io import StringIO
import matplotlib.pyplot as plt
import os
data_Dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data')
results_Dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'results')
try:
os.mkdir(results_Dir)
except FileExistsError:
pass
if not os.path.exists(data_Dir):
os.mkdir(data_Dir)
import csv
def ProcessFile(filename) -> str:
print(filename)
if filename.endswith('.txt') is False:
# Ignore non-data files
return
# Define column names and missing value indicators
columnNames = ['year', 'month', 'tmax', 'tmin', 'frost_days', 'rain_mm', 'sun']
mvs = {'tmax': '---', 'tmin': '---', 'frost_days': '---', 'rain_mm': '---', 'sun': '---'}
# Create pseudo-file that has removed the Site closed lines from
# any files that have it e.g. cwmystwythdata.txt.
clean = []
with open(os.path.join(data_Dir, filename), 'r') as file:
for lines in file:
if 'site closed' not in lines.casefold(): # Skip lines containing 'Site closed'
clean += [lines]
cleaned_pseudo_file = StringIO(''.join(clean))
# Read the cleaned lines, skipping the first lines of metadata
# and the "provisional" data from the end of the file.
import pandas as pd
df = pd.read_csv(cleaned_pseudo_file, sep=r'\s+', skiprows=8, names=columnNames, na_values=mvs, on_bad_lines='skip')
print(len(df))
# Create an index with the dates
df['date'] = pd.to_datetime(df[['year', 'month']]
.assign(day=1))
df.set_index('date', inplace=True)
df.drop(columns=['year', 'month'], inplace=True)
# Some data values have '*' or '#' appended to them.
# Force these to numeric values, returning NaN if it isn't possible.
for c in columnNames[2:]:
df[c] = pd.to_numeric(df[c], errors='coerce')
f .write(f"{filename.replace("data.txt", "")},{df['tmax'].mean()}\n")
ax = df['tmax'].plot(kind='line')
ax.set_title(filename.replace('data.txt', ''))
ax.set_xlabel('Year')
ax.set_ylabel('Temperature degC')
ax.get_figure().savefig(os.path.join(results_Dir, filename.replace('data.txt', '.png',
)), dpi=90)
plt.close()
f = open(os.path.join(results_Dir, 'max_temps.csv'), 'wt')
f .write("location,max_temp\n")
for filename in os.listdir(os.path.join(os.path.dirname(results_Dir), 'data')):
ProcessFile(filename)
f.close()