create
or replace function pysal_skater(
tablename TEXT,
geometry TEXT,
col TEXT,
id_col TEXT,
n_clusters INT,
floor INT
) returns table (id TEXT, col FLOAT, regions INT, geometry TEXT)
AS $$
import spopt
import libpysal as lps
import json
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.metrics import pairwise as skm
import numpy
from shapely import wkt
neighbors = plpy.execute(f'''
select
json_object_agg(b.{ id_col }, a.neighbors) as neighbors
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col } :: text) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geometry }, b.{ geometry })
and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
''')
weights = plpy.execute(f'''
select
json_object_agg(b.{ id_col }, a.weights) as weights
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geometry }, b.{ geometry })
and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
''')
ids = []
for i in json.loads(weights[0]['weights']):
ids.append(i)
w = lps.weights.W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True, id_order=ids)
w.transform='r'
to_gdf = plpy.execute(f'''
select
st_astext(st_transform({ geometry }, 4326)) as geometry,
{ col } :: numeric as col,
{ id_col } as id
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geometry }, b.{ geometry })
and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
''')
gdf_data = []
for i in to_gdf:
gdf_data.append(i)
gdf = gpd.GeoDataFrame(gdf_data)
spanning_forest_kwds = dict(
dissimilarity=skm.manhattan_distances,
affinity=None,
reduction=numpy.sum,
center=numpy.mean,
verbose=False
)
model = spopt.region.Skater(
gdf,
w,
['col'],
n_clusters=n_clusters,
floor=floor,
trace=False,
islands='increase',
spanning_forest_kwds=spanning_forest_kwds
)
model.solve()
gdf['regions'] = model.labels_
return gdf.to_dict(orient = 'records')
$$
LANGUAGE 'plpython3u';