mikonvergence commited on
Commit
8cc8f31
·
verified ·
1 Parent(s): 744959e

Upload 4 files

Browse files
helpers/functional.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fsspec.parquet import open_parquet_file
2
+ import fsspec
3
+ import pyarrow.parquet as pq
4
+ from .grid import *
5
+ import pandas as pd
6
+ from io import BytesIO
7
+ import os
8
+ from PIL import Image
9
+
10
+ # GLOBAL VARIABLES
11
+ if os.path.isfile('metadata.parquet'):
12
+ meta_path = 'metadata.parquet'
13
+ else:
14
+ DATASET_NAME = 'Major-TOM/Core-S2L2A'
15
+ meta_path = 'https://huggingface.co/datasets/{}/resolve/main/metadata.parquet'.format(DATASET_NAME)
16
+
17
+ grid = Grid(10, latitude_range=(-90,90), longitude_range=(-180,180))
18
+ df = pd.read_parquet(meta_path)
19
+
20
+ # HELPER FUNCTIONS
21
+ def gridcell2ints(grid_string):
22
+ up = int(grid_string.split('_')[0][:-1]) * (2*int(grid_string.split('_')[0][-1]=='U') - 1) # +ve if up
23
+ right = int(grid_string.split('_')[1][:-1]) * (2*int(grid_string.split('_')[1][-1]=='R') - 1) # +ve if R
24
+
25
+ return up, right
26
+
27
+ def row2image(parquet_url, parquet_row, fullrow_read=True):
28
+
29
+ if fullrow_read:
30
+ # option 1
31
+ f=fsspec.open(parquet_url)
32
+ temp_path = f.open()
33
+ else:
34
+ # option 2
35
+ temp_path = open_parquet_file(parquet_url,columns = ["thumbnail"])
36
+
37
+ with pq.ParquetFile(temp_path) as pf:
38
+ first_row_group = pf.read_row_group(parquet_row, columns=['thumbnail'])
39
+
40
+ stream = BytesIO(first_row_group['thumbnail'][0].as_py())
41
+ return Image.open(stream)
42
+
43
+ def row2s2(parquet_url, parquet_row, s2_bands = ["B04", "B03", "B02"]):
44
+ with open_parquet_file(parquet_url,columns = s2_bands) as f:
45
+ with pq.ParquetFile(f) as pf:
46
+ first_row_group = pf.read_row_group(parquet_row, columns=s2_bands)
47
+
48
+ return first_row_group
49
+
50
+ def cell2row(grid_string, meta_df, return_row = False):
51
+ row_U, col_R = gridcell2ints(grid_string)
52
+ R = meta_df.query('grid_row_u == {} & grid_col_r == {}'.format(row_U, col_R))
53
+
54
+ if not R.empty:
55
+ if return_row:
56
+ return R.parquet_url.item(), R.parquet_row.item(), R
57
+ else:
58
+ return R.parquet_url.item(), R.parquet_row.item()
59
+ else:
60
+ return None
61
+
62
+ def map_to_image(map, return_centre=False):
63
+
64
+ # 1. get bounds
65
+ bbox = map.get_bbox()
66
+ center = [(bbox[3]+bbox[1])/2, (bbox[2]+bbox[0])/2]
67
+
68
+ # 2. translate coordinate to major-tom tile
69
+ rows, cols = grid.latlon2rowcol([center[0]], [center[1]])
70
+
71
+ # 3. translate major-tom cell to row in parquet
72
+ row = cell2row("{}_{}".format(rows[0],cols[0]), df, return_row = True)
73
+
74
+ if row is not None:
75
+ parquet_url, parquet_row, meta_row = row
76
+ img = row2image(parquet_url, parquet_row)
77
+ # 4. acquire image # X. update map
78
+ lat, lon = meta_row.centre_lat.item(), meta_row.centre_lon.item()
79
+
80
+ if return_centre:
81
+ return img, (lat,lon)
82
+ else:
83
+ return img
84
+ else:
85
+ return None
helpers/grid.py CHANGED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import math
3
+ import pandas as pd
4
+ import geopandas as gpd
5
+ from shapely.geometry import LineString, Polygon
6
+ from tqdm import tqdm
7
+
8
+
9
+
10
+ class Grid():
11
+
12
+ RADIUS_EQUATOR = 6378.137 # km
13
+
14
+ def __init__(self,dist,latitude_range=(-85,85),longitude_range=(-180,180),utm_definition='bottomleft'):
15
+ self.dist = dist
16
+ self.latitude_range = latitude_range
17
+ self.longitude_range = longitude_range
18
+ self.utm_definition = utm_definition
19
+ self.rows,self.lats = self.get_rows()
20
+ self.points, self.points_by_row = self.get_points()
21
+
22
+ def get_rows(self):
23
+
24
+ # Define set of latitudes to use, based on the grid distance
25
+ arc_pole_to_pole = math.pi * self.RADIUS_EQUATOR
26
+ num_divisions_in_hemisphere = math.ceil(arc_pole_to_pole / self.dist)
27
+
28
+ latitudes = np.linspace(-90, 90, num_divisions_in_hemisphere+1)[:-1]
29
+ latitudes = np.mod(latitudes, 180) - 90
30
+
31
+ # order should be from south to north
32
+ latitudes = np.sort(latitudes)
33
+
34
+ zeroth_row = np.searchsorted(latitudes,0)
35
+
36
+ # From 0U-NU and 1D-ND
37
+ rows = [None] * len(latitudes)
38
+ rows[zeroth_row:] = [f'{i}U' for i in range(len(latitudes)-zeroth_row)]
39
+ rows[:zeroth_row] = [f'{abs(i-zeroth_row)}D' for i in range(zeroth_row)]
40
+
41
+ # bound to range
42
+ idxs = (latitudes>=self.latitude_range[0]) * (latitudes<=self.latitude_range[1])
43
+ rows,latitudes = np.array(rows), np.array(latitudes)
44
+ rows,latitudes = rows[idxs],latitudes[idxs]
45
+
46
+ return rows,latitudes
47
+
48
+ def get_circumference_at_latitude(self,lat):
49
+
50
+ # Circumference of the cross-section of a sphere at a given latitude
51
+
52
+ radius_at_lat = self.RADIUS_EQUATOR * math.cos(lat * math.pi / 180)
53
+ circumference = 2 * math.pi * radius_at_lat
54
+
55
+ return circumference
56
+
57
+ def subdivide_circumference(self,lat,return_cols=False):
58
+ # Provide a list of longitudes that subdivide the circumference of the earth at a given latitude
59
+ # into equal parts as close as possible to dist
60
+
61
+ circumference = self.get_circumference_at_latitude(lat)
62
+ num_divisions = math.ceil(circumference / self.dist)
63
+ longitudes = np.linspace(-180,180, num_divisions+1)[:-1]
64
+ longitudes = np.mod(longitudes, 360) - 180
65
+ longitudes = np.sort(longitudes)
66
+
67
+
68
+ if return_cols:
69
+ cols = [None] * len(longitudes)
70
+ zeroth_idx = np.where(longitudes==0)[0][0]
71
+ cols[zeroth_idx:] = [f'{i}R' for i in range(len(longitudes)-zeroth_idx)]
72
+ cols[:zeroth_idx] = [f'{abs(i-zeroth_idx)}L' for i in range(zeroth_idx)]
73
+ return np.array(cols),np.array(longitudes)
74
+
75
+ return np.array(longitudes)
76
+
77
+ def get_points(self):
78
+
79
+ r_idx = 0
80
+ points_by_row = [None]*len(self.rows)
81
+ for r,lat in zip(self.rows,self.lats):
82
+ point_names,grid_row_names,grid_col_names,grid_row_idx,grid_col_idx,grid_lats,grid_lons,utm_zones,epsgs = [],[],[],[],[],[],[],[],[]
83
+ cols,lons = self.subdivide_circumference(lat,return_cols=True)
84
+
85
+ cols,lons = self.filter_longitude(cols,lons)
86
+ c_idx = 0
87
+ for c,lon in zip(cols,lons):
88
+ point_names.append(f'{r}_{c}')
89
+ grid_row_names.append(r)
90
+ grid_col_names.append(c)
91
+ grid_row_idx.append(r_idx)
92
+ grid_col_idx.append(c_idx)
93
+ grid_lats.append(lat)
94
+ grid_lons.append(lon)
95
+ if self.utm_definition == 'bottomleft':
96
+ utm_zones.append(get_utm_zone_from_latlng([lat,lon]))
97
+ elif self.utm_definition == 'center':
98
+ center_lat = lat + (1000*self.dist/2)/111_120
99
+ center_lon = lon + (1000*self.dist/2)/(111_120*math.cos(center_lat*math.pi/180))
100
+ utm_zones.append(get_utm_zone_from_latlng([center_lat,center_lon]))
101
+ else:
102
+ raise ValueError(f'Invalid utm_definition {self.utm_definition}')
103
+ epsgs.append(f'EPSG:{utm_zones[-1]}')
104
+
105
+ c_idx += 1
106
+ points_by_row[r_idx] = gpd.GeoDataFrame({
107
+ 'name':point_names,
108
+ 'row':grid_row_names,
109
+ 'col':grid_col_names,
110
+ 'row_idx':grid_row_idx,
111
+ 'col_idx':grid_col_idx,
112
+ 'utm_zone':utm_zones,
113
+ 'epsg':epsgs
114
+ },geometry=gpd.points_from_xy(grid_lons,grid_lats))
115
+ r_idx += 1
116
+ points = gpd.GeoDataFrame(pd.concat(points_by_row))
117
+ # points.reset_index(inplace=True,drop=True)
118
+ return points, points_by_row
119
+
120
+ def group_points_by_row(self):
121
+ # Make list of different gdfs for each row
122
+ points_by_row = [None]*len(self.rows)
123
+ for i,row in enumerate(self.rows):
124
+ points_by_row[i] = self.points[self.points.row==row]
125
+ return points_by_row
126
+
127
+ def filter_longitude(self,cols,lons):
128
+ idxs = (lons>=self.longitude_range[0]) * (lons<=self.longitude_range[1])
129
+ cols,lons = cols[idxs],lons[idxs]
130
+ return cols,lons
131
+
132
+ def latlon2rowcol(self,lats,lons,return_idx=False):
133
+ """
134
+ Convert latitude and longitude to row and column number from the grid
135
+ """
136
+ # Always take bottom left corner of grid cell
137
+ rows = np.searchsorted(self.lats,lats)-1
138
+
139
+ # Get the possible points of the grid cells at the given latitude
140
+ possible_points = [self.points_by_row[row] for row in rows]
141
+
142
+ # For each point, find the rightmost point that is still to the left of the given longitude
143
+ cols = [poss_points.iloc[np.searchsorted(poss_points.geometry.x,lon)-1].col for poss_points,lon in zip(possible_points,lons)]
144
+ rows = self.rows[rows]
145
+
146
+ if return_idx:
147
+ # Get the table index for self.points with each row,col pair in rows, cols
148
+ idx = [self.points[(self.points.row==row) & (self.points.col==col)].index.values[0] for row,col in zip(rows,cols)]
149
+ return rows,cols,idx
150
+ return rows,cols
151
+
152
+ def rowcol2latlon(self,rows,cols):
153
+ point_geoms = [self.points.loc[(self.points.row==row) & (self.points.col==col),'geometry'].values[0] for row,col in zip(rows,cols)]
154
+ lats = [point.y for point in point_geoms]
155
+ lons = [point.x for point in point_geoms]
156
+ return lats,lons
157
+
158
+ def get_bounded_footprint(self,point,buffer_ratio=0):
159
+ # Gets the polygon footprint of the grid cell for a given point, bounded by the other grid points' cells.
160
+ # Grid point defined as bottom-left corner of polygon. Buffer ratio is the ratio of the grid cell's width/height to buffer by.
161
+
162
+ bottom,left = point.geometry.y,point.geometry.x
163
+ row = point.row
164
+ row_idx = point.row_idx
165
+ col_idx = point.col_idx
166
+ next_row_idx = row_idx+1
167
+ next_col_idx = col_idx+1
168
+
169
+ if next_row_idx >= len(self.lats): # If at top row, use difference between top and second-to-top row for height
170
+ height = (self.lats[row_idx] - self.lats[row_idx-1])
171
+ top = self.lats[row_idx] + height
172
+ else:
173
+ top = self.lats[next_row_idx]
174
+
175
+ max_col = len(self.points_by_row[row].col_idx)-1
176
+ if next_col_idx > max_col: # If at rightmost column, use difference between rightmost and second-to-rightmost column for width
177
+ width = (self.points_by_row[row].iloc[col_idx].geometry.x - self.points_by_row[row].iloc[col_idx-1].geometry.x)
178
+ right = self.points_by_row[row].iloc[col_idx].geometry.x + width
179
+ else:
180
+ right = self.points_by_row[row].iloc[next_col_idx].geometry.x
181
+
182
+ # Buffer the polygon by the ratio of the grid cell's width/height
183
+ width = right - left
184
+ height = top - bottom
185
+
186
+ buffer_horizontal = width * buffer_ratio
187
+ buffer_vertical = height * buffer_ratio
188
+
189
+ new_left = left - buffer_horizontal
190
+ new_right = right + buffer_horizontal
191
+
192
+ new_bottom = bottom - buffer_vertical
193
+ new_top = top + buffer_vertical
194
+
195
+ bbox = Polygon([(new_left,new_bottom),(new_left,new_top),(new_right,new_top),(new_right,new_bottom)])
196
+
197
+ return bbox
198
+
199
+
200
+ def get_utm_zone_from_latlng(latlng):
201
+ """
202
+ Get the UTM ZONE from a latlng list.
203
+
204
+ Parameters
205
+ ----------
206
+ latlng : List[Union[int, float]]
207
+ The latlng list to get the UTM ZONE from.
208
+
209
+ return_epsg : bool, optional
210
+ Whether or not to return the EPSG code instead of the WKT, by default False
211
+
212
+ Returns
213
+ -------
214
+ str
215
+ The WKT or EPSG code.
216
+ """
217
+ assert isinstance(latlng, (list, np.ndarray)), "latlng must be in the form of a list."
218
+
219
+ zone = math.floor(((latlng[1] + 180) / 6) + 1)
220
+ n_or_s = "S" if latlng[0] < 0 else "N"
221
+
222
+ false_northing = "10000000" if n_or_s == "S" else "0"
223
+ central_meridian = str(zone * 6 - 183)
224
+ epsg = f"32{'7' if n_or_s == 'S' else '6'}{str(zone)}"
225
+
226
+ return epsg
227
+
228
+
229
+ if __name__ == '__main__':
230
+ import matplotlib.pyplot as plt
231
+
232
+ dist = 100
233
+ grid = Grid(dist,latitude_range=(10,70),longitude_range=(-30,60))
234
+
235
+ from pprint import pprint
236
+
237
+ test_lons = np.random.uniform(-20,50,size=(1000))
238
+ test_lats = np.random.uniform(12,68,size=(1000))
239
+
240
+ test_rows,test_cols = grid.latlon2rowcol(test_lats,test_lons)
241
+ test_lats2,test_lons2 = grid.rowcol2latlon(test_rows,test_cols)
242
+
243
+ print(test_lons[:10])
244
+ print(test_lats[:10])
245
+ print(test_rows[:10])
246
+ print(test_cols[:10])
247
+
248
+ # Make line segments from the points to their corresponding grid points
249
+ lines = []
250
+ for i in range(len(test_lats)):
251
+ lines.append([(test_lons[i],test_lats[i]),(test_lons2[i],test_lats2[i])])
252
+
253
+ lines = gpd.GeoDataFrame(geometry=gpd.GeoSeries([LineString(line) for line in lines]))
254
+
255
+ lines.to_file(f'testlines_{dist}km.geojson',driver='GeoJSON')
256
+ grid.points.to_file(f'testgrid_{dist}km.geojson',driver='GeoJSON')
helpers/s2l1c_metadata.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e19d451d89510923c4bc3d8acf8c45985903a73c89539e431292d6226a4b5ddc
3
+ size 171721623
helpers/s2l2a_metadata.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a96c80bc43cb841b8400b05e80f4b477453b51a1e6833821333e3c11831e78b
3
+ size 173048695