-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_climate_data_good.py
More file actions
169 lines (130 loc) · 5.17 KB
/
plot_climate_data_good.py
File metadata and controls
169 lines (130 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
Script to generate figures for climate paper.
Run with `python climate_paper_figs.py`.
"""
import csv
from io import StringIO
import logging
from pathlib import Path
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import pandas as pd
logger = logging.getLogger("climate_paper")
DATA_DIR = Path(__file__).parent.parent / "data"
RESULTS_DIR = Path(__file__).parent.parent / "results"
# This message is used to demonstrate that code is run when module is imported.
print(f"Hello from {__file__} body!")
def plot_climate_paper_figs_and_csv(data_dir: Path, work_dir: Path) -> None:
"""
Read files from data_dir and plot graphs of daily maximum temperature and
a .csv file of mean maximum temperatures. Output files are stored in
work_dir.
"""
mean_max_temps: list[dict[str, str | float]] = [] # store for mean max temp data
for data_file in data_dir.glob("*data.txt"):
logger.info("Processing %s", data_file.name)
try:
station_data = read_metoffice_file(data_file)
except Exception:
logger.exception("Failed to read %s", data_file)
continue
station_name = get_station_name(data_file)
max_mean_temp = calculate_mean_maximum_temperature(station_data)
mean_max_temps.append(
{
"location": station_name,
"max_temp": max_mean_temp
}
)
plot_max_temp_png(station_data, station_name)
csv_file = work_dir / "max_temps.csv"
write_max_temps_csv_file(mean_max_temps, csv_file)
def read_metoffice_file(data_file: Path) -> pd.DataFrame:
"""
Read Met Office Historic Station data file into Pandas dataframe.
"""
clean_pseudo_file = _preprocess_metoffice_file(data_file)
# Define column names and missing value indicators
column_names = ['year', 'month', 'tmax', 'tmin', 'frost_days', 'rain_mm', 'sun']
missing_values = {
'tmax': '---',
'tmin': '---',
'frost_days': '---',
'rain_mm': '---',
'sun': '---'
}
# Read the cleaned lines, skipping the first lines of metadata
# and the "provisional" data from the end of the file.
df = pd.read_csv(
clean_pseudo_file,
sep=r"\s+",
skiprows=7,
names=column_names,
na_values=missing_values,
on_bad_lines="skip",
)
# Create an index with the dates
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))
df.set_index('date', inplace=True)
df.drop(columns=['year', 'month'], inplace=True)
# Some data values have '*' or '#' appended to them.
# Force these to numeric values, returning NaN if it isn't possible.
for column in column_names[2:]:
df[column] = pd.to_numeric(df[column], errors='coerce')
logger.debug("%s rows loaded", len(df))
return df
def get_station_name(data_file: Path) -> str:
return data_file.name.replace("data.txt", "")
def calculate_mean_maximum_temperature(station_data: pd.DataFrame) -> float:
mean_max_temp = station_data['tmax'].mean()
logger.debug("Mean maximum temperature: %s", mean_max_temp)
return mean_max_temp
def plot_max_temp_png(station_data: pd.DataFrame, station_name: str, work_dir: Path = RESULTS_DIR):
"""
Generate a .png file in the working directory with a plot of max temp.
"""
fig = plot_max_temp_figure(station_data, station_name)
fig.savefig(f"{work_dir / station_name}.png", dpi=150)
plt.close(fig)
def plot_max_temp_figure(station_data: pd.DataFrame, station_name: str) -> Figure:
fig, ax = plt.subplots()
ax = station_data['tmax'].plot(kind='line', ax=ax)
ax.set_title(station_name)
ax.set_xlabel('Year')
ax.set_ylabel('Temperature degC')
return fig
def write_max_temps_csv_file(
mean_max_temp_data: list[dict[str, str | float]], csv_file: Path
):
"""
Use Python's inbuilt `csv` library to write data to csv file.
"""
logger.info("Writing CSV data to %s", csv_file)
field_names = ["location", "max_temp"]
# newline must be specified to stop Windows adding extra linebreaks
with open(csv_file, "wt", newline='', encoding="utf-8") as output_file:
writer = csv.DictWriter(output_file, fieldnames=field_names)
writer.writeheader()
writer.writerows(mean_max_temp_data)
def _preprocess_metoffice_file(data_file: Path) -> StringIO:
"""
Create file-like StringIO object containing cleaned version of original
file. Removes "Station closed" lines e.g. from cwmystwythdata.txt.
"""
logger.debug("Preprocessing %s", data_file)
clean_lines = []
with open(data_file, "r") as input_file:
clean_lines = [
line for line in input_file if "site closed" not in line.casefold()
]
return StringIO("".join(clean_lines))
print(f"'__name__' is currently: '{__name__}'")
if __name__ == "__main__":
# This message is not displayed when module is imported.
print(f"{__file__} is being run as a script!")
logging.basicConfig(
level=logging.INFO,
format="%(name)s - %(levelname)s: %(message)s",
)
RESULTS_DIR.mkdir(exist_ok=True)
plot_climate_paper_figs_and_csv(DATA_DIR, RESULTS_DIR)