select
*
from
cb_2018_us_county_500k
where
statefp = '55'
select
id,
st_centroid(geom) as geom
from
cb_2018_us_county_500k
where
statefp = '55'
create
or relace view wi_centroids AS
select
id,
st_centroid(geom) as geom
from
cb_2018_us_county_500k
where
statefp = '55'
select
*
from
cb_2018_us_county_500k
where
statefp = '55'
select
id,
st_centroid(geom) as geom
from
cb_2018_us_county_500k
where
statefp = '55'
create
or relace view wi_centroids AS
select
id,
st_centroid(geom) as geom
from
cb_2018_us_county_500k
where
statefp = '55'
select
name,
-- Use the length() function to find the number of letters, or length of the name
length(name),
-- Use the pg_column_size() function to find the size of the column in bytes
pg_column_size(name),
-- Here we are doing a few things
-- 1. Getting the size of the geom column in bytes using ST_MemSize()
-- 2. Casting the result of ST_MemSize() to a numeric value since
-- ST_MemSize() to format it as a number implicitly
-- 3. Use pg_size_pretty() to format the bytes in human readable sizes like kb or mb
pg_size_pretty(st_memsize(geom) :: numeric)
from
cb_2018_us_county_500k
-- Using order by to order the results of ST_MemSize()
-- from largest to smallest, or descending using desc
order by
st_memsize(geom) desc
select
name,
-- Calculate the total number of points in the geometry using ST_NPoints()
st_npoints(geom) as n_points,
-- Calculate the size of the geometry using ST_MemSize()
st_memsize(geom) as size_bytes,
-- Using the formula we saw, calculate the size of the geometry as: 40 + ( no. of points * 16)
40 + (16 * st_npoints(geom)) as calculated_size
from
cb_2018_us_county_500k
order by
st_memsize(geom) desc
select
name,
-- Use ST_GeometryType to see our geometry type
st_geometrytype(geom) as geom_type,
st_memsize(geom) as size_bytes,
-- Use ST_NumGeometries to see our geometry type
st_numgeometries(geom) as num_geoms,
-- Try and calculate using the formula (no. of geometries * 40) + (no. of points * 16)
(st_numgeometries(geom) * 40) + (16 * st_npoints(geom)) as calculated_size
from
cb_2018_us_county_500k
order by
st_memsize(geom) desc
select
name,
st_geometrytype(geom) as geom_type,
st_memsize(geom) as size_bytes,
st_numgeometries(geom) as num_geoms,
-- Try and calculate using the formula 32 + ((no. of geometries * 16) + (no. of points * 16))
32 + (
(st_numgeometries(geom) * 16) + (16 * st_npoints(geom))
) as calculated_size
from
cb_2018_us_county_500k
order by
st_memsize(geom) desc
select
pc.geom,
pc.postal_code,
count(customers.customer_id)
from
postal_codes pc
join customers using (postal_code)
group by
pc.postal_code
select
*
from
street_centerlines_current
where
date_added between '01-01-2022'
and '06-30-2022'
select
name,
-- Use the length() function to find the number of letters, or length of the name
length(name),
-- Use the pg_column_size() function to find the size of the column in bytes
pg_column_size(name),
-- Here we are doing a few things
-- 1. Getting the size of the geom column in bytes using ST_MemSize()
-- 2. Casting the result of ST_MemSize() to a numeric value since
-- ST_MemSize() to format it as a number implicitly
-- 3. Use pg_size_pretty() to format the bytes in human readable sizes like kb or mb
pg_size_pretty(st_memsize(geom) :: numeric)
from
cb_2018_us_county_500k
-- Using order by to order the results of ST_MemSize()
-- from largest to smallest, or descending using desc
order by
st_memsize(geom) desc
select
name,
-- Calculate the total number of points in the geometry using ST_NPoints()
st_npoints(geom) as n_points,
-- Calculate the size of the geometry using ST_MemSize()
st_memsize(geom) as size_bytes,
-- Using the formula we saw, calculate the size of the geometry as: 40 + ( no. of points * 16)
40 + (16 * st_npoints(geom)) as calculated_size
from
cb_2018_us_county_500k
order by
st_memsize(geom) desc
select
name,
-- Use ST_GeometryType to see our geometry type
st_geometrytype(geom) as geom_type,
st_memsize(geom) as size_bytes,
-- Use ST_NumGeometries to see our geometry type
st_numgeometries(geom) as num_geoms,
-- Try and calculate using the formula (no. of geometries * 40) + (no. of points * 16)
(st_numgeometries(geom) * 40) + (16 * st_npoints(geom)) as calculated_size
from
cb_2018_us_county_500k
order by
st_memsize(geom) desc
select
name,
st_geometrytype(geom) as geom_type,
st_memsize(geom) as size_bytes,
st_numgeometries(geom) as num_geoms,
-- Try and calculate using the formula 32 + ((no. of geometries * 16) + (no. of points * 16))
32 + (
(st_numgeometries(geom) * 16) + (16 * st_npoints(geom))
) as calculated_size
from
cb_2018_us_county_500k
order by
st_memsize(geom) desc
select
pc.geom,
pc.postal_code,
count(customers.customer_id)
from
postal_codes pc
join customers using (postal_code)
group by
pc.postal_code
select
*
from
street_centerlines_current
where
date_added between '01-01-2022'
and '06-30-2022'
insert into
spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext)
values
(
104726,
'ESRI',
104726,
'+proj=longlat +a=6378418.941 +rf=298.257222100883 +no_defs +type=crs',
'GEOGCS["GCS_NAD_1983_HARN_Adj_MN_Hennepin",DATUM["D_NAD_1983_HARN_Adj_MN_Hennepin",SPHEROID["S_GRS_1980_Adj_MN_Hennepin",6378418.941,298.257222100883,AUTHORITY["ESRI","107726"]],AUTHORITY["ESRI","106726"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["ESRI","104726"]]'
);
create view mn_104726 as
select
id,
st_transform(geom, 104726)
from
cb_2018_us_county_500k
where
statefp = '27'
-- First we select your data from the ZIP codes table
-- and aggregate or count the total number of records
-- from the NYC 311 data
select
zips.zipcode,
zips.geom,
count(nyc_311.*) as count
-- Then we join the tables on a common ID, in this case the ZIP code
from
nyc_zips zips
join nyc_311 on nyc_311.incident_zip = zips.zipcode
-- Then we filter using WHERE to the right complaint type
-- and group the results by the ZIP code and geometry
where
nyc_311.complaint_type = 'Illegal Parking'
group by
zips.zipcode,
zips.geom
select
zips.zipcode,
zips.geom,
count(nyc_311.id) as count
from
nyc_zips zips
join nyc_311 on nyc_311.incident_zip = zips.zipcode
where
nyc_311.complaint_type = 'Illegal Parking'
group by
zips.zipcode,
zips.geom
-- In this CTE, which has an alias of "a",
-- we pull the data we need from the nyc_311 data
-- and filter to just the results that match "Illegal Parking"
with a as (
select
id,
incident_zip as zipcode
from
nyc_311
where
nyc_311.complaint_type = 'Illegal Parking'
)
-- We then join the data from our "temporary table" a to the zipcode data
select
zips.zipcode,
zips.geom,
count(a.id) as count
from
nyc_zips zips
join a using (zipcode)
group by
zips.zipcode,
zips.geom
-- Now we have our entire aggregation in the CTE
with a as (
select
count("unique key") as total,
"incident zip" as zipcode
from
nyc_311
where
nyc_311."complaint type" = 'Illegal Parking'
group by
"incident zip"
)
select
zips.zipcode,
zips.geom,
a.total
from
nyc_zips zips
join a using (zipcode)
select
*
from
nyc_311
where
DATE_PART('month', "created date") = 7
and "complaint type" = 'Illegal Fireworks'
select
*
from
nyc_311
where
date_part('month', "created date" :: date) = 7
and "complaint type" = 'Illegal Fireworks'
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=user password=password \
dbname=geo" /Users/matt/Documents/spatial-sql-book/nyc_taxi_yellow_0616-07.parquet \
-nln nyc_taxi_yellow_0616 -lco GEOMETRY_NAME=geom
docker run --rm -v /Users:/Users --network="host" osgeo/gdal
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" \
/Users/mattforrest/Documents/spatial-sql-data/nyc_yellow_taxi_0601_0615_2016.parquet \
-nln nyc_yellow_taxi_0601_0615_2016 -lco GEOMETRY_NAME=geom
create table nyc_311 (
id int primary key,
created_date timestamp,
closed_date timestamp,
agency text,
agency_name text,
complaint_type text,
descriptor text,
location_type text,
incident_zip text,
incident_address text,
street_name text,
cross_street_1 text,
cross_street_2 text,
intersection_street_1 text,
intersection_street_2 text,
address_type text,
city text,
landmark text,
facility_type text,
status text,
due_date timestamp,
resolution_description text,
resolution_action_updated_date timestamp,
community_board text,
bbl text,
borough text,
x_coordinate_planar numeric,
y_coordinate_planar numeric,
open_data_channel_type text,
park_facility_name text,
park_borough text,
vehicle_type text,
taxi_company_borough text,
taxi_pickup_location text,
bridge_highway_name text,
bridge_highway_description text,
road_ramp text,
bridge_highway_segment text,
latitude numeric,
longitude numeric,
location text
)
docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/Building_Footprints.geojson
\ -nln nyc_building_footprints -lco GEOMETRY_NAME=geom
docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/2015_NYC_Tree_Census.geojson \
-nln nyc_2015_tree_census -lco GEOMETRY_NAME=geom
docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/nyc_mappluto_22v3_shp/MapPLUTO.shp \
-nln nyc_mappluto -lco GEOMETRY_NAME=geom \
-nlt MULTIPOLYGON -mapFieldType Real=String
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=user password=password \
dbname=geo" /Users/matt/Documents/spatial-sql-book/nyc_taxi_yellow_0616-07.parquet \
-nln nyc_taxi_yellow_0616 -lco GEOMETRY_NAME=geom
docker run --rm -v /Users:/Users --network="host" osgeo/gdal
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" \
/Users/mattforrest/Documents/spatial-sql-data/nyc_yellow_taxi_0601_0615_2016.parquet \
-nln nyc_yellow_taxi_0601_0615_2016 -lco GEOMETRY_NAME=geom
create table nyc_311 (
id int primary key,
created_date timestamp,
closed_date timestamp,
agency text,
agency_name text,
complaint_type text,
descriptor text,
location_type text,
incident_zip text,
incident_address text,
street_name text,
cross_street_1 text,
cross_street_2 text,
intersection_street_1 text,
intersection_street_2 text,
address_type text,
city text,
landmark text,
facility_type text,
status text,
due_date timestamp,
resolution_description text,
resolution_action_updated_date timestamp,
community_board text,
bbl text,
borough text,
x_coordinate_planar numeric,
y_coordinate_planar numeric,
open_data_channel_type text,
park_facility_name text,
park_borough text,
vehicle_type text,
taxi_company_borough text,
taxi_pickup_location text,
bridge_highway_name text,
bridge_highway_description text,
road_ramp text,
bridge_highway_segment text,
latitude numeric,
longitude numeric,
location text
)
docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/Building_Footprints.geojson
\ -nln nyc_building_footprints -lco GEOMETRY_NAME=geom
docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/2015_NYC_Tree_Census.geojson \
-nln nyc_2015_tree_census -lco GEOMETRY_NAME=geom
docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/nyc_mappluto_22v3_shp/MapPLUTO.shp \
-nln nyc_mappluto -lco GEOMETRY_NAME=geom \
-nlt MULTIPOLYGON -mapFieldType Real=String
select
complaint_type,
location_type,
city
from
nyc_311
limit
5
select
complaint_type,
location_type,
city
from
nyc_311
where
city = 'Brooklyn'
limit
5
select
complaint_type,
location_type,
city,
city = 'BROOKLYN'
from
nyc_311
limit
5
select
spc_common,
nta_name, health
from
nyc_2015_tree_census
limit
5
select
spc_common || ' - ' || health as joined
from
nyc_2015_tree_census
limit
5
select
concat(spc_common, ' - ', health)
from
nyc_2015_tree_census
limit
5
select
spc_common,
left(spc_common, 5) as left,
right(spc_common, 3) as right
from
nyc_2015_tree_census
limit
5
select
spc_common,
initcap(spc_common) as titlecase,
upper(spc_common) as uppercase,
lower(spc_common) as lowercase
from
nyc_2015_tree_census
limit
5
select
spc_common,
replace(spc_common, 'locust', ' locust') as new_text
from
nyc_2015_tree_census
limit
5
select
spc_common,
reverse(spc_common) as backwards
from
nyc_2015_tree_census
limit
5
select
spc_common,
length(spc_common) as how_long
from
nyc_2015_tree_census
limit
5
select
spc_common,
split_part(spc_common, ' ', 2) as tree_group
from
nyc_2015_tree_census
limit
5
select
spc_common,
split_part(
replace(spc_common, 'locust', ' locust'),
' ',
2
) as tree_group
from
nyc_2015_tree_census
limit
5
select
total_amount,
tip_amount,
tip_amount /(total_amount - tip_amount) as tip_percent
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
select
pickup_datetime,
dropoff_datetime
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
select
to_char(pickup_datetime, 'DD Mon YYYY') as start_date,
to_char(dropoff_datetime, 'DD Mon YYYY') as end_date
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
select
to_char(pickup_datetime, 'D Month YYYY') as start_date,
to_char(dropoff_datetime, 'D Month YYYY') as end_date
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
select
to_char(pickup_datetime, 'Day, Month FMDDth, YYYY') as start_date,
to_char(dropoff_datetime, 'Day, Month FMDDth, YYYY ') as end_date
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
select
dropoff_datetime - pickup_datetime as duration
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
select
extract(
dow
from
pickup_datetime
) as day_of_week,
extract(
hour
from
pickup_datetime
) as hour_of_day
from
nyc_yellow_taxi_0601_0615_2016
where
tip_amount > 0
and trip_distance > 2
limit
5
with json_table as (
select
JSON(
'{"first": "Matt",
"last": "Forrest",
"age": 35}'
) as data
)
select
data
from
json_table
with json_table as (
select
JSON(
'{"first": "Matt",
"last": "Forrest",
"age": 35}'
) as data
)
select
data -> 'last'
from
json_table
with json_table as (
select
JSON(
'{
"first": "Matt",
"last": "Forrest",
"age": 35,
"cities": ["Minneapolis", "Madison", "New York City"],
"skills": {"SQL": true, "Python": true, "Java": false}
}'
) as data
)
select
data
from
json_table
with json_table as (
select
JSON(
'{
"first": "Matt",
"last": "Forrest",
"age": 35,
"cities": ["Minneapolis", "Madison", "New York City"],
"skills": {"SQL": true, "Python": true, "Java": false}
}'
) as data
)
select
data -> 'cities' -> 1 as city,
data -> 'skills' -> 'SQL' as sql_skills
from
json_table
select
cast(zipcode as numeric)
from
nyc_zips
limit
3
select
zipcode :: numeric
from
nyc_zips
limit
3
select
*
from
nyc_2015_tree_census
where
health = 'Fair'
select
*
from
nyc_2015_tree_census
where
stump_diam > 0
select
*
from
nyc_2015_tree_census
where
stump_diam :: numeric > 0
select
spc_common
from
nyc_2015_tree_census
where
spc_common > 'Maple'
select
*
from
public.nyc_311
where
complaint_type = 'Illegal Fireworks'
and city = 'BROOKLYN'
limit
25
select
*
from
public.nyc_311
where
complaint_type = 'Illegal Fireworks'
or agency = 'NYPD'
limit
25
select
*
from
nyc_311
where
complaint_type IN ('Illegal Fireworks', 'Noise - Residential')
limit
25
select
*
from
nyc_311
where
complaint_type IN ('Illegal Fireworks', 'Noise - Residential')
and descriptor IN ('Loud Music/Party', 'Banging/Pounding')
limit
25
select
*
from
nyc_311
where
complaint_type IN ('Illegal Fireworks', 'Noise - Residential')
and descriptor NOT IN ('Loud Music/Party', 'Banging/Pounding')
limit
25
select
*
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:05:00'
-- All return true since it matches the exact word
select
'maple' like '%maple', --true
'maple' like 'maple%', --true
'maple' like '%maple%' --true
-- The first returns false since the phrase does not end in the word "maple"
select
'maple syrup' like '%maple', --false
'maple syrup' like 'maple%', --true
'maple syrup' like '%maple%' --true
-- The second returns false since the phrase does not begin with the word "maple"
select
'red maple' like '%maple', --true
'red maple' like 'maple%', --false
'red maple' like '%maple%' --true
select
spc_common
from
public.nyc_2015_tree_census
where
spc_common like '%maple%
SELECT
'maple' like 'm___', --false
'maple' like 'm____', --true
'maple' like 'm_____' --false
select
*
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_longitude IS NULL
select
distinct spc_common
from
nyc_2015_tree_census
where
spc_common like '%maple%'
limit
5
select
nta_name,
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
group by
nta_name
limit
5
select
nta_name,
problems,
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
group by
nta_name,
problems
limit
5
select
nta_name,
array_agg(distinct curb_loc),
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
group by
nta_name
limit
3
select
nta_name,
avg(stump_diam :: numeric)
from
nyc_2015_tree_census
where
stump_diam :: numeric > 0
group by
nta_name
limit
3
select
passenger_count,
avg(tip_amount) filter (
where
tip_amount > 5
)
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:05:00'
group by
passenger_count
select
passenger_count,
avg(tip_amount) filter (
where
tip_amount > 5
),
count(ogc_fid)
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:05:00'
and count(ogc_fid) > 50
group by
passenger_count
select
passenger_count,
avg(tip_amount) filter (
where
tip_amount > 5
),
count(ogc_fid)
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:05:00'
group by
passenger_count
having
count(ogc_fid) > 50
select
passenger_count,
tip_amount
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:05:00'
order by
tip_amount desc
limit
5
select
nta_name,
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
group by
nta_name
order by
count(ogc_fid) desc
limit
5
select
nta_name,
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
group by
nta_name
order by
count(ogc_fid) desc
limit
5 offset 5
case
when temp > 35 then 'Super Hot !'
when temp between 30
and 35 then 'Hot'
when temp between 25
and 30 then 'Pretty Warm'
when temp between 20
and 25 then 'Warm'
when temp between 15
and 20 then 'Cool / Warm'
when temp between 10
and 15 then 'Cool'
when temp between 5
and 10 then 'Chilly'
when temp between 0
and 5 then 'Cold'
when temp between -5
and 0 then 'Pretty Cold'
when temp between -10
and -5 then 'Very Cold'
when temp between -15
and -10 then 'Brrrrr'
when temp > -15 then 'Frigid !'
else null
end
select
spc_common,
case
when spc_common like '%maple%' then 'Maple'
else 'Not a Maple'
end as is_maple
from
nyc_2015_tree_census
limit
10
select
fare_amount,
tip_amount,
case
when tip_amount / fare_amount between.15
and.2 then 'Good'
when tip_amount / fare_amount between.2
and.25 then 'Great'
when tip_amount / fare_amount between.25
and.3 then 'Amazing'
when tip_amount / fare_amount >.3 then 'Awesome'
else 'Not Great'
end as tip_class
from
nyc_yellow_taxi_0601_0615_2016
limit
10
select
nta_name,
sum(
case
when spc_common like '%maple%' then 1
else 0
end
) just_maples,
count(ogc_fid) as all_trees
from
nyc_2015_tree_census
group by
nta_name
limit
5
case
when temp > 35 then 'Super Hot !'
when temp between 30
and 35 then 'Hot'
when temp between 25
and 30 then 'Pretty Warm'
when temp between 20
and 25 then 'Warm'
when temp between 15
and 20 then 'Cool / Warm'
when temp between 10
and 15 then 'Cool'
when temp between 5
and 10 then 'Chilly'
when temp between 0
and 5 then 'Cold'
when temp between -5
and 0 then 'Pretty Cold'
when temp between -10
and -5 then 'Very Cold'
when temp between -15
and -10 then 'Brrrrr'
when temp > -15 then 'Frigid !'
else null
end
select
spc_common,
case
when spc_common like '%maple%' then 'Maple'
else 'Not a Maple'
end as is_maple
from
nyc_2015_tree_census
limit
10
select
fare_amount,
tip_amount,
case
when tip_amount / fare_amount between.15
and.2 then 'Good'
when tip_amount / fare_amount between.2
and.25 then 'Great'
when tip_amount / fare_amount between.25
and.3 then 'Amazing'
when tip_amount / fare_amount >.3 then 'Awesome'
else 'Not Great'
end as tip_class
from
nyc_yellow_taxi_0601_0615_2016
limit
10
select
nta_name,
sum(
case
when spc_common like '%maple%' then 1
else 0
end
) just_maples,
count(ogc_fid) as all_trees
from
nyc_2015_tree_census
group by
nta_name
limit
5
select
zipcode,
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
and zipcode in (
select
zipcode
from
nyc_zips
where
population > 100000
)
group by
zipcode
select
zipcode,
count(ogc_fid)
from
nyc_2015_tree_census
where
spc_common like '%maple%'
and zipcode in ('11226', '11368', '11373')
group by
zipcode
select
zipcode,
count(ogc_fid)
from
nyc_2015_tree_census
where
zipcode = (
select
zipcode
from
nyc_zips
order by
population asc
limit
1
)
group by
zipcode
with lanes as (
select
ogc_fid,
lanecount
from
nyc_bike_routes
order by
lanecount desc
limit
3
)
select
*
from
lanes
with lanes as (
select
ogc_fid,
lanecount
from
nyc_bike_routes
order by
lanecount desc
limit
3
), lanes_2 as (
select
ogc_fid,
lanecount
from
nyc_bike_routes
order by
lanecount desc
limit
3 offset 12
)
select
*
from
lanes
-- the UNION operator allows us to bring
-- two tables with matching columns together
union
select
*
from
lanes_2
create table test (city text, country text, size_rank numeric)
insert into
test (city, country, size_rank)
values
('Tokyo', 'Japan', 1),
('Delhi', 'India', 2),
('Shanghai', 'China', 3),
('São Paulo', 'Brazil', 4),
('Mexico City', 'Mexico', 5)
alter table
test
add
column population numeric
alter table
test rename column population to city_pop
alter table
test
alter column
city_pop type int
update
test
set
city = 'Ciudad de México'
where
city = 'Mexico City'
alter table
test rename to world_cities
alter table
world_cities drop column city_pop
delete from
world_cities
where
city = 'Tokyo'
drop table world_cities
select
corr(assesstot :: numeric, lotarea :: numeric)
from
nyc_mappluto
select
stddev_samp(lotarea :: numeric)
from
nyc_mappluto
where
borough = 'BK'
select
var_samp(lotarea :: numeric)
from
nyc_mappluto
where
borough = 'BK'
select
mode() within group (
order by
lotarea :: numeric desc
)
from
nyc_mappluto
where
borough = 'BK'
select
ogc_fid,
tip_amount,
percent_rank() over(
order by
tip_amount asc
)
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:00:05'
and tip_amount > 0
select
ogc_fid,
tip_amount,
rank() over(
order by
tip_amount asc
)
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:00:05'
and tip_amount > 0
select
ogc_fid,
tip_amount,
dense_rank() over(
order by
tip_amount asc
)
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:00:05'
and tip_amount > 0
select
percentile_disc(0.25) within group (
order by
tip_amount
) as per_25,
percentile_disc(0.5) within group (
order by
tip_amount
) as per_50,
percentile_disc(0.75) within group (
order by
tip_amount
) as per_75
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime between '2016-06-10 15:00:00'
and '2016-06-10 15:00:05'
and tip_amount > 0
with taxis as (
select
sum(total_amount) as total,
pickup_datetime :: date as date
from
nyc_yellow_taxi_0601_0615_2016
group by
pickup_datetime :: date
order by
pickup_datetime :: date asc
)
select
date,
total,
avg(total) over (
order by
date rows between 2 preceding
and current row
)
from
taxis
with taxis as (
select
sum(total_amount) as total,
passenger_count,
pickup_datetime :: date as date
from
nyc_yellow_taxi_0601_0615_2016
group by
pickup_datetime :: date,
passenger_count
order by
pickup_datetime :: date asc,
passenger_count desc
)
select
date,
total,
passenger_count,
sum(total) over (
partition by passenger_count
order by
date rows between 2 preceding
and current row
)
from
taxis
select
avg(tip_amount / total_amount),
extract(
hour
from
pickup_datetime
) as hour_of_day
from
nyc_yellow_taxi_0601_0615_2016
where
pickup_datetime :: date = '2016-06-15'
-- since we can't divide by 0 we will remove all amounts
-- that equal 0
and total_amount > 0
group by
extract(
hour
from
pickup_datetime
)
order by
extract(
hour
from
pickup_datetime
) asc
with taxis as (
select
avg(tip_amount / total_amount) as tip_percentage,
date_trunc('hour', pickup_datetime) as day_hour
from
nyc_yellow_taxi_0601_0615_2016
where
total_amount > 5
group by
date_trunc('hour', pickup_datetime)
)
select
day_hour,
tip_percentage,
avg(tip_percentage) over (
order by
day_hour asc rows between 5 preceding
and current row
) as moving_average
from
taxis
with taxis as (
select
sum(total_amount) as total,
passenger_count,
pickup_datetime :: date as date
from
nyc_yellow_taxi_0601_0615_2016
group by
pickup_datetime :: date,
passenger_count
order by
pickup_datetime :: date asc,
passenger_count desc
)
select
date,
total,
passenger_count,
sum(total) over (
partition by passenger_count
order by
date
)
from
taxis
select
row_number() over() as row_no,
ogc_fid
from
nyc_yellow_taxi_0601_0615_2016
limit
5
select
row_number() over(partition by pickup_datetime) as row_no,
ogc_fid,
pickup_datetime
from
nyc_yellow_taxi_0601_0615_2016
limit
5 offset 100000 -- using offset since the first part of the datasets has all passenger counts as 0
with taxis as (
select
sum(total_amount) as total,
passenger_count,
pickup_datetime :: date as date
from
nyc_yellow_taxi_0601_0615_2016
group by
pickup_datetime :: date,
passenger_count
order by
pickup_datetime :: date asc,
passenger_count desc
)
select
date,
total,
passenger_count,
total - lag(total, 1) over (
partition by passenger_count
order by
date
)
from
taxis
select
nyc_311.complaint_type,
nyc_311.incident_zip,
nyc_zips.population
from
nyc_311
join nyc_zips on nyc_311.incident_zip = nyc_zips.zipcode
limit
5
select
nyc_311.complaint_type,
nyc_311.incident_zip,
nyc_zips.population
from
nyc_311
join nyc_zips on nyc_311.incident_zip = nyc_zips.zipcode
where
nyc_zips.population > 80000
limit
5
select
a.complaint_type,
a.incident_zip,
b.population
from
nyc_311 a
join nyc_zips b on a.incident_zip = b.zipcode
where
b.population > 80000
limit
5
with b as (select population, zipcode from nyc_zips limit 30)
select
a.complaint_type,
a.incident_zip,
b.population
from
nyc_311 a
left join b on a.incident_zip = b.zipcode
limit
5
with a as (
select
complaint_type,
incident_zip
from
nyc_311
limit
30
)
select
a.complaint_type,
a.incident_zip,
b.population
from
a
right join nyc_zips b on a.incident_zip = b.zipcode
limit
5
with a as (
select
complaint_type,
incident_zip
from
nyc_311
limit
30
), b as (
select
population,
zipcode
from
nyc_zips
limit
30
)
select
a.complaint_type,
a.incident_zip,
b.population
from
a full
outer join b on a.incident_zip = b.zipcode
limit
100
with a as (
select
neighborhood
from
nyc_neighborhoods
limit
2
), b as (
select
population,
zipcode
from
nyc_zips
limit
2
)
select
a.neighborhood,
b.population
from
a
cross join b
with a as (
select
neighborhood
from
nyc_neighborhoods
limit
2
), b as (
select
population,
zipcode
from
nyc_zips
limit
2
)
select
a.neighborhood,
b.population
from
a,
b
with a as (
select
neighborhood
from
nyc_neighborhoods
limit
2
), b as (
select
population,
zipcode
from
nyc_zips
limit
2
)
select
a.neighborhood,
b.population,
b.population / 1000 as calculation
from
a,
b
with a as (
select
ogc_fid,
zipcode :: text
from
nyc_mappluto
)
select
count(a.ogc_fid),
b.zipcode
from
nyc_zips b
join a using(zipcode)
group by
b.zipcode
order by
count(a.ogc_fid) desc
with a as (
select
ogc_fid,
zipcode :: text
from
nyc_mappluto
order by
ogc_fid desc
limit
5000
), c as (
select
ogc_fid,
zipcode
from
nyc_2015_tree_census
order by
ogc_fid desc
limit
5000
)
select
count(a.ogc_fid) as buildings,
-- count(c.ogc_fid) as trees,
b.zipcode
from
nyc_zips b
join a using(zipcode)
-- join c
-- using(zipcode)
group by
b.zipcode
order by
count(a.ogc_fid) desc
with a as (
select
ogc_fid,
zipcode :: text
from
nyc_mappluto
order by
ogc_fid desc
limit
5000
), c as (
select
ogc_fid,
zipcode
from
nyc_2015_tree_census
order by
ogc_fid desc
limit
5000
)
select
-- count(a.ogc_fid) as buildings,
count(c.ogc_fid) as trees,
b.zipcode
from
nyc_zips b
-- join a using(zipcode)
join c using(zipcode)
group by
b.zipcode
-- order by count(a.ogc_fid) desc
order by
count(c.ogc_fid) desc
with a as (
select
count(ogc_fid) as buildings,
zipcode :: text
from
nyc_mappluto
group by
zipcode
),
b as (
select
count(ogc_fid) as trees,
zipcode
from
nyc_2015_tree_census
group by
zipcode
)
select
a.buildings,
b.trees,
c.zipcode
from
nyc_zips c
join a using(zipcode)
join b using(zipcode)
order by
b.trees desc
select
a.neighborhood,
trees.trees_per_sq_meter
from
nyc_neighborhoods a
cross join lateral (
select
count(ogc_fid) / a.area :: numeric as trees_per_sq_meter
from
nyc_2015_tree_census
where
a.neighborhood = neighborhood
) trees
order by
trees.trees_per_sq_meter desc
create table cities (
city text,
country text,
size_rank numeric,
time_zone text,
time_zone_abbrev text
)
create
or replace function set_timezones() returns trigger language plpgsql as $$ begin
update
cities
set
time_zone = a.name,
time_zone_abbrev = a.abbrev
from
pg_timezone_names a
where
a.name like '%' || replace(city, ' ', ' _ ') || '%';
return new;
end;
$$
create trigger update_city_tz
after
insert
on cities for each row execute procedure set_timezones();
insert into
cities (city, country, size_rank)
values
('Tokyo', 'Japan', 1),
('Delhi', 'India', 2),
('Shanghai', 'China', 3),
('São Paulo', 'Brazil', 4),
('Mexico City', 'Mexico', 5)
create
or replace function set_timezones() returns trigger language plpgsql as $$ begin
update
cities
set
time_zone = data.name,
time_zone_abbrev = data.abbrev
from
(
select
name,
abbrev
from
pg_timezone_names
where
name like '%' || replace(city, ' ', '_') || '%'
) as data;
return new;
end;
$$
create
or replace function tip_percentage(tip_column float, total_column float)
returns numeric
language plpgsql
as $$
declare tip_percentage numeric;
begin if total_column = 0 then tip_percentage = 0;
elsif total_column is null then tip_percentage = 0;
elsif total_column > 0 then tip_percentage = tip_column / total_column;
end if;
return tip_percentage;
end;
$$
select
total_amount,
tip_amount,
tip_percentage(tip_amount, total_amount)
from
nyc_yellow_taxi_0601_0615_2016
order by
pickup_datetime desc
limit
10 offset 10000
select
*
from
find_311_text_match('food')
limit
5
select
geom
from
nyc_building_footprints
limit
1
select
st_astext(geom) as wkt
from
nyc_building_footprints
limit
5
insert into
geometries
values
('point', st_geomfromtext('POINT(0 0)')),
(
'line',
st_geomfromtext('LINESTRING(0 0,1 1,1 2)')
),
(
'polygon',
st_geomfromtext(
'POLYGON((0 0,4 0,4 4,0 4,0 0),(1 1, 2 1, 2 2, 1 2,1 1))'
)
)
insert into
geometries
values
(
'multipoint',
st_geomfromtext('MULTIPOINT((0 0),(1 2))')
),
(
'multiline',
st_geomfromtext('MULTILINESTRING((0 0,1 1,1 2),(2 3,3 2,5 4))')
),
(
'multipolygon',
st_geomfromtext(
'MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)), ((-1 -1,-1 -2,-2 -2,-2 -1,-1 -1)))'
)
),
(
'geometry collection',
st_geomfromtext(
'GEOMETRYCOLLECTION(POINT(2 3),LINESTRING(2 3,3 4))'
)
)
select
st_curvetoline(
st_geomfromtext('CIRCULARSTRING(0 0, 4 0, 4 4, 0 4, 0 0)')
) as geom
select
geom
from
nyc_building_footprints
limit
1
select
st_astext(geom) as wkt
from
nyc_building_footprints
limit
5
insert into
geometries
values
('point', st_geomfromtext('POINT(0 0)')),
(
'line',
st_geomfromtext('LINESTRING(0 0,1 1,1 2)')
),
(
'polygon',
st_geomfromtext(
'POLYGON((0 0,4 0,4 4,0 4,0 0),(1 1, 2 1, 2 2, 1 2,1 1))'
)
)
insert into
geometries
values
(
'multipoint',
st_geomfromtext('MULTIPOINT((0 0),(1 2))')
),
(
'multiline',
st_geomfromtext('MULTILINESTRING((0 0,1 1,1 2),(2 3,3 2,5 4))')
),
(
'multipolygon',
st_geomfromtext(
'MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)), ((-1 -1,-1 -2,-2 -2,-2 -1,-1 -1)))'
)
),
(
'geometry collection',
st_geomfromtext(
'GEOMETRYCOLLECTION(POINT(2 3),LINESTRING(2 3,3 4))'
)
)
select
st_curvetoline(
st_geomfromtext('CIRCULARSTRING(0 0, 4 0, 4 4, 0 4, 0 0)')
) as geom
select
st_memsize(st_geomfromtext('POINT(0 0)')) as geom
select
st_memsize(st_geomfromtext('LINESTRING(0 0, 0 1)')) as geom
select
st_memsize(
st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
) as geom
select
st_memsize(geom)
from
nyc_neighborhoods
where
neighborhood = 'College Point'
select
st_npoints(geom) as points,
st_geometrytype(geom) as type,
st_numgeometries(geom) as geometries
from
nyc_neighborhoods
where
neighborhood = 'College Point'
select
st_memsize(
st_geomfromtext('MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)))')
) as geom
select
st_memsize(
st_geomfromtext(
'MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)), ((1 1, 2 1, 2 2, 1 2, 1 1)))'
)
) as geom
select
st_length(
st_geomfromtext(
'LINESTRING(743238 2967416,743238 2967450,743265 2967450,
743265.625 2967416,743238 2967416)',
2249
)
);
-- Transforming WGS 84 LINESTRING to Massachusetts State Plane Meters
select
st_length(
st_transform(
st_geomfromewkt(
'srid=4326;linestring(-72.1260 42.45, -72.1240 42.45666, -72.123 42.1546)'
),
26986
)
);
with a as (
select
geom
from
nyc_zips
limit
5
)
select
st_collect(geom)
from
a
with a as (
select
geom,
population,
zipcode
from
nyc_zips
limit
5
)
select
string_agg(zipcode, ',') as zips,
sum(population) as total_pop,
st_collect(geom)
from
a
select
st_makeenvelope(-66.125091, 18.296531, -65.99142, 18.471986) as geom
select
st_makeenvelope(
-66.125091,
18.296531,
-65.99142,
18.471986,
4326
) as geom
select
st_makepoint(pickup_longitude, pickup_latitude) as geom
from
nyc_yellow_taxi_0601_0615_2016
limit
100
select
ogc_fid,
st_setsrid(
st_makepoint(pickup_longitude, pickup_latitude),
4326
) as geom
from
nyc_yellow_taxi_0601_0615_2016
limit
100
create or replace function BuildPoint(x numeric, y numeric, srid int)
returns geometry
language plpgsql
as $$
begin
return st_setsrid(st_makepoint(x, y), srid);
end;
$$;
select
ogc_fid,
buildpoint(
pickup_longitude :: numeric,
pickup_latitude :: numeric,
4326
) as geom
from
nyc_yellow_taxi_0601_0615_2016
limit
100
SELECT
st_setsrid(
ST_Translate(ST_Scale(ST_Letters('Spatial SQL'), 1, 1), 0, 0),
4326
);
select
st_dump(geom) as geom
from
nyc_neighborhoods
where
neighborhood = 'City Island'
select
(st_dump(geom)).geom as geom
from
nyc_neighborhoods
where
neighborhood = 'City Island'
select
st_geometrytype(geom)
from
nyc_neighborhoods
where
neighborhood = 'City Island'
select
st_memsize(geom)
from
nyc_neighborhoods
where
neighborhood = 'City Island'
select
st_npoints(geom)
from
nyc_neighborhoods
where
neighborhood = 'City Island'
select
st_pointn(geom, 1) as geom
from
nyc_bike_routes
where
segmentid = '331385'
select
st_geometrytype(geom) as geom
from
nyc_bike_routes
where
segmentid = '331385'
select
st_pointn(st_linemerge(geom), 1) as geom
from
nyc_bike_routes
where
segmentid = '331385'
select
*
from
nyc_building_footprints
where
st_isvalid(geom) is false
select
mpluto_bbl,
st_isvaliddetail(geom)
from
nyc_building_footprints
where
mpluto_bbl in ('1022430261', '1016710039', '4039760001')
select
mpluto_bbl,
st_isvalidreason(geom)
from
nyc_building_footprints
where
mpluto_bbl in ('1022430261', '1016710039', '4039760001')
select
mpluto_bbl,
st_isvalid(st_makevalid(geom))
from
nyc_building_footprints
where
mpluto_bbl in ('1022430261', '1016710039', '4039760001')
select
st_srid(geom)
from
nyc_building_footprints
limit
3
select
ogc_fid,
st_transform(geom, 2263) as geom
from
nyc_building_footprints
limit
3
select
st_geomfromtext('POINT(-73.9772294 40.7527262)', 4326) as geom
select
st_asgeojson(geom)
from
nyc_building_footprints
limit
3
select
st_geomfromtext('POINT(-73.9772294 40.7527262)', 4326) as geom
select
st_buffer(st_transform(geom, 4326) :: geography, -200)
from
nyc_zips
limit
10
select
st_centroid(st_transform(geom, 4326) :: geography)
from
nyc_zips
limit
10
select
st_transform(geom, 4326) as original,
st_chaikinsmoothing(st_transform(geom, 4326), 1) as one,
st_chaikinsmoothing(st_transform(geom, 4326), 5) as five
from
nyc_zips
limit
10
-- Find the first 50 trees in Fort Greene with the
-- Latitude and longitude columns
with a as (
select
latitude,
longitude
from
nyc_2015_tree_census
where
nta_name = 'Fort Greene'
limit
50
),
-- Turn the latitude/longitude columns into geometries
b as (
select
st_collect(
buildpoint(longitude :: numeric, latitude :: numeric, 4326)
) as geom
from
a
)
-- Create multiple concave hulls with various concave-ness
-- and use UNION to turn them into a single table
select
st_concavehull(geom, 0.1) as hull
from
b
union
select
st_concavehull(geom, 0.3) as hull
from
b
union
select
st_concavehull(geom, 0.7) as hull
from
b
with a as (
select
latitude,
longitude
from
nyc_2015_tree_census
where
nta_name = 'Fort Greene'
limit
50
), b as (
select
st_collect(
buildpoint(longitude :: numeric, latitude :: numeric, 4326)
) as geom
from
a
)
select
st_convexhull(geom) as hull
from
b
select
st_delaunaytriangles(st_transform(geom, 4326)) as triangles
from
nyc_zips
where
zipcode = '10009'
select
st_generatepoints(st_transform(geom, 4326), 500) as points
from
nyc_zips
where
zipcode = '10009'
select
st_linemerge(geom) as geom from
nyc_bike_routes
where
street = '7 AV'
and fromstreet = '42 ST'
and tostreet = '65 ST'
-- Create multiple geometries with different simplification levels
-- and UNION them into one table
select
st_transform(geom, 4326) as geom
from
nyc_zips
where
zipcode = '11693'
union
select
st_transform(st_simplify(geom, 1), 4326) as geom
from
nyc_zips
where
zipcode = '11693'
union
select
st_transform(st_simplify(geom, 10), 4326) as geom
from
nyc_zips
where
zipcode = '11693'
union
select
st_transform(st_simplify(geom, 50), 4326) as geom
from
nyc_zips
where
zipcode = '11693'
-- Create multiple geometries that share borders with different
-- simplification levels and UNION them into one table
select
st_transform(geom, 4326) as geom
from
nyc_zips
where
zipcode = '11434'
or st_touches(
geom,
(
select
geom
from
nyc_zips
where
zipcode = '11434'
)
)
union
select
st_transform(st_simplifypreservetopology(geom, 1), 4326) as geom
from
nyc_zips
where
zipcode = '11434'
or st_touches(
geom,
(
select
geom
from
nyc_zips
where
zipcode = '11434'
)
)
union
select
st_transform(st_simplifypreservetopology(geom, 10), 4326) as geom
from
nyc_zips
where
zipcode = '11434'
or st_touches(
geom,
(
select
geom
from
nyc_zips
where
zipcode = '11434'
)
)
union
select
st_transform(st_simplifypreservetopology(geom, 50), 4326) as geom
from
nyc_zips
where
zipcode = '11434'
or st_touches(
geom,
(
select
geom
from
nyc_zips
where
zipcode = '11434'
)
)
select
st_area(geom) as area
from
nyc_building_footprints
limit
5
select
st_area(geom :: geography) as area
from
nyc_building_footprints
limit
5
with one as (
select
geom
from
nyc_zips
where
zipcode = '10009'
),
two as (
select
geom
from
nyc_zips
where
zipcode = '10001'
)
select
st_closestpoint(one.geom, two.geom) as point
from
one,
two
with one as (
select
geom
from
nyc_zips
where
zipcode = '10009'
),
two as (
select
geom
from
nyc_zips
where
zipcode = '10001'
)
select
st_distance(one.geom, two.geom) as dist
from
one,
two
with one as (
select
geom
from
nyc_zips
where
zipcode = '10009'
),
two as (
select
geom
from
nyc_zips
where
zipcode = '10001'
)
select
st_distance(one.geom, two.geom) / 5280 as dist
from
one,
two
with one as (
select
geom :: geography as geog
from
nyc_zips
where
zipcode = '10009'
),
two as (
select
geom :: geography as geog
from
nyc_zips
where
zipcode = '10001'
)
select
st_distance(one.geog, two.geog) / 1609 as dist
from
one,
two
select
st_srid(geom)
from
nyc_zips
limit
1
with one as (
select
geom
from
nyc_zips
where
zipcode = '10009'
),
two as (
select
geom
from
nyc_zips
where
zipcode = '10001'
)
select
st_transform(st_shortestline(one.geom, two.geom), 4326) as line
from
one,
two
select
st_length(geom :: geography)
from
nyc_bike_routes
limit
1
select
st_perimeter(geom)
from
nyc_zips
where
zipcode = '10009'
with one as (
select
geom
from
nyc_zips
where
zipcode = '10009'
),
two as (
select
geom
from
nyc_zips
where
zipcode = '10001'
)
select
st_transform(st_shortestline(one.geom, two.geom), 4326) as line
from
one,
two
select
ogc_fid,
trip_distance,
total_amount,
-- ST_Intersects will return a boolean in a column
st_intersects(
-- First geometry is at the pickup location using the buildpoint function
buildpoint(
pickup_longitude :: numeric,
pickup_latitude :: numeric,
4326
),
-- This selects the geometry for the West Village
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
)
)
from
nyc_yellow_taxi_0601_0615_2016
order by
pickup_datetime asc
limit
10
select
ogc_fid,
trip_distance,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
-- Using ST_Intersects in the WHERE clause
st_intersects(
-- Using ST_Intersects in the WHERE clause, first with the pick up point
buildpoint(
pickup_longitude :: numeric,
pickup_latitude :: numeric,
4326
),
-- Selecting the geometry for the West Village
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
)
)
order by
pickup_datetime asc
limit
10
alter table
nyc_yellow_taxi_0601_0615_2016
add
column pickup geometry,
add
column dropoff geometry
update
nyc_yellow_taxi_0601_0615_2016
set
pickup = st_setsrid(
st_makepoint(pickup_longitude, pickup_latitude),
4326
),
dropoff = st_setsrid(
st_makepoint(pickup_longitude, pickup_latitude),
4326
)
select
ogc_fid,
trip_distance,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
st_intersects(
pickup,
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
)
)
order by
pickup_datetime asc
limit
10
select
a.ogc_fid,
a.trip_distance,
a.total_amount,
st_intersects(a.pickup, b.geom)
from
nyc_yellow_taxi_0601_0615_2016 a,
nyc_neighborhoods b
where
b.neighborhood = 'West Village'
order by
a.pickup_datetime asc
limit
100
select
a.ogc_fid,
a.trip_distance,
a.total_amount
from
nyc_yellow_taxi_0601_0615_2016 a
-- Since ST_Intersects will return true or false
-- we can use it to evaluate the join
join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
b.neighborhood = 'West Village'
order by
a.pickup_datetime asc
limit
10
select
a.ogc_fid,
a.trip_distance,
a.total_amount,
b.neighborhood
from
nyc_yellow_taxi_0601_0615_2016 a
join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
b.neighborhood = 'West Village'
order by
a.pickup_datetime asc
limit
10
select
-- Here we can aggregate other data that all falls within our
-- joined geometry data
a.neighborhood,
sum(b.total_amount) as sum_amount,
avg(b.total_amount) as avg_amount
from
nyc_neighborhoods a
join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
where
a.neighborhood = 'West Village'
group by
a.neighborhood
limit
5
with a as (
select
a.neighborhood,
sum(b.total_amount) as sum_amount
from
nyc_neighborhoods a
join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
where
a.neighborhood = 'West Village'
group by
a.neighborhood
)
select
a.sum_amount,
b.*
from
a
join nyc_neighborhoods b using (neighborhood)
select
zipcode,
population,
z.neighbor_sum
from
nyc_zips a
-- This join will join across to each row of data above
-- but can also reference data from the outer query
cross join lateral (
select
sum(population) as neighbor_sum
from
nyc_zips
where
st_intersects(geom, a.geom)
and a.zipcode != zipcode
) z
select
ogc_fid,
trip_distance,
total_amount,
-- ST_Intersects will return a boolean in a column
st_intersects(
-- First geometry is at the pickup location using the buildpoint function
buildpoint(
pickup_longitude :: numeric,
pickup_latitude :: numeric,
4326
),
-- This selects the geometry for the West Village
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
)
)
from
nyc_yellow_taxi_0601_0615_2016
order by
pickup_datetime asc
limit
10
select
ogc_fid,
trip_distance,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
-- Using ST_Intersects in the WHERE clause
st_intersects(
-- Using ST_Intersects in the WHERE clause, first with the pick up point
buildpoint(
pickup_longitude :: numeric,
pickup_latitude :: numeric,
4326
),
-- Selecting the geometry for the West Village
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
)
)
order by
pickup_datetime asc
limit
10
alter table
nyc_yellow_taxi_0601_0615_2016
add
column pickup geometry,
add
column dropoff geometry
update
nyc_yellow_taxi_0601_0615_2016
set
pickup = st_setsrid(
st_makepoint(pickup_longitude, pickup_latitude),
4326
),
dropoff = st_setsrid(
st_makepoint(pickup_longitude, pickup_latitude),
4326
)
select
ogc_fid,
trip_distance,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
st_intersects(
pickup,
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
)
)
order by
pickup_datetime asc
limit
10
select
a.ogc_fid,
a.trip_distance,
a.total_amount,
st_intersects(a.pickup, b.geom)
from
nyc_yellow_taxi_0601_0615_2016 a,
nyc_neighborhoods b
where
b.neighborhood = 'West Village'
order by
a.pickup_datetime asc
limit
100
select
a.ogc_fid,
a.trip_distance,
a.total_amount
from
nyc_yellow_taxi_0601_0615_2016 a
-- Since ST_Intersects will return true or false
-- we can use it to evaluate the join
join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
b.neighborhood = 'West Village'
order by
a.pickup_datetime asc
limit
10
select
a.ogc_fid,
a.trip_distance,
a.total_amount,
b.neighborhood
from
nyc_yellow_taxi_0601_0615_2016 a
join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
b.neighborhood = 'West Village'
order by
a.pickup_datetime asc
limit
10
select
-- Here we can aggregate other data that all falls within our
-- joined geometry data
a.neighborhood,
sum(b.total_amount) as sum_amount,
avg(b.total_amount) as avg_amount
from
nyc_neighborhoods a
join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
where
a.neighborhood = 'West Village'
group by
a.neighborhood
limit
5
with a as (
select
a.neighborhood,
sum(b.total_amount) as sum_amount
from
nyc_neighborhoods a
join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
where
a.neighborhood = 'West Village'
group by
a.neighborhood
)
select
a.sum_amount,
b.*
from
a
join nyc_neighborhoods b using (neighborhood)
select
zipcode,
population,
z.neighbor_sum
from
nyc_zips a
-- This join will join across to each row of data above
-- but can also reference data from the outer query
cross join lateral (
select
sum(population) as neighbor_sum
from
nyc_zips
where
st_intersects(geom, a.geom)
and a.zipcode != zipcode
) z
select
name,
geom
from
nyc_building_footprints
where
st_contains(
(
select
st_buffer(
buildpoint(-73.993584, 40.750580, 4326) :: geography,
200
) :: geometry
),
geom
)
select
a.ogc_fid,
a.name,
st_centroid(st_transform(a.geom, 4326)),
b.ogc_fid as b_id,
st_transform(b.geom, 4326)
from
nyc_building_footprints a
join nyc_bike_routes b on st_crosses(a.geom, b.geom)
select
name,
geom
from
nyc_building_footprints
where
st_disjoint(
(
select
st_buffer(
buildpoint(-74.002222, 40.733889, 4326) :: geography,
200
) :: geometry
),
geom
)
order by
st_distance(
(
select
st_buffer(
buildpoint(-74.002222, 40.733889, 4326) :: geography,
200
) :: geometry
),
geom
) asc
limit
200
select
st_geomfromtext(
'GEOMETRYCOLLECTION(LINESTRING(0 0, 3 3), POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)))'
) as geom,
st_overlaps(
st_geomfromtext('LINESTRING(0 0, 0 3)'),
st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
)
select
st_geomfromtext(
'GEOMETRYCOLLECTION(POLYGON((1 0, 1 1, 1 2, 0 2, 1 0)), POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)))'
) as geom,
st_overlaps(
st_geomfromtext('POLYGON((1 0, 1 1, 1 2, 0 2, 1 0))'),
st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
)
select
st_geomfromtext(
'GEOMETRYCOLLECTION(LINESTRING(1 0, 1 1, 0 1, 0 0), LINESTRING(2 1, 1 1, 0 1, 1 2))'
) as geom,
st_overlaps(
st_geomfromtext('LINESTRING(1 0, 1 1, 0 1, 0 0)'),
st_geomfromtext('LINESTRING(2 1, 1 1, 0 1, 1 2)')
)
select
*
from
nyc_zips
where
st_touches(
geom,
(
select
geom
from
nyc_zips
where
zipcode = '10009'
)
)
select
name,
geom
from
nyc_building_footprints
where
st_within(
geom,
(
select
st_buffer(
buildpoint(-74.002222, 40.733889, 4326) :: geography,
200
) :: geometry
)
)
select
name,
geom
from
nyc_building_footprints
where
st_intersects(
geom,
(
select
st_buffer(
buildpoint(-74.002222, 40.733889, 4326) :: geography,
200
) :: geometry
)
)
select
name,
geom
from
nyc_building_footprints
where
st_intersects(
geom,
(
select
st_buffer(
buildpoint(-74.002222, 40.733889, 4326) :: geography,
10000
) :: geometry
)
)
select
name,
geom
from
nyc_building_footprints
where
st_dwithin(
st_transform(geom, 3857),
buildpoint(-74.002222, 40.733889, 3857),
200
)
select
name,
geom
from
nyc_building_footprints
where
st_dwithin(
st_transform(geom, 3857),
st_transform(
st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
3857
),
200
)
alter table
nyc_building_footprints
add
column geom_3857 geometry;
update
nyc_building_footprints
set
geom_3857 = st_transform(geom, 3857);
select
name,
geom
from
nyc_building_footprints
where
st_dwithin(
geom_3857,
st_transform(
st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
3857
),
200
)
with a as (
select
st_transform(
st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
3857
) as geo
)
select
name,
geom
from
nyc_building_footprints,
a
where
st_dwithin(geom_3857, a.geo, 200)
create index geom_3857_idx on
nyc_building_footprints using gist(geom_3857)
with a as (
select
st_transform(
st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
3857
) as geo
)
select
name,
geom
from
nyc_building_footprints,
a
where
st_dwithin(
geom_3857,
(
select
geo
from
a
),
10000
)
select
a.ogc_fid,
a.health,
a.spc_common,
b.neighborhood
from
nyc_2015_tree_census a,
nyc_neighborhoods b
where
st_intersects(a.geom, b.geom)
and a.spc_common ilike '%maple%'
select
a.ogc_fid,
a.health,
a.spc_common,
b.neighborhood
from
nyc_2015_tree_census a
join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
and a.spc_common ilike '%maple%'
create table nyc_2010_neighborhoods_subdivide as
select
st_subdivide(geom) as geom,
neighborhood
from
nyc_neighborhoods
with trees as (
select
ogc_fid,
health,
spc_common,
geom
from
nyc_2015_tree_census
where
spc_common ilike '%maple%'
)
select
trees.ogc_fid,
trees.health,
trees.spc_common,
b.neighborhood
from
trees
join nyc_neighborhoods_subdivide b on st_intersects(trees.geom, b.geom)
create index nyc_neighborhoods_subdivide_geom_idx
n nyc_neighborhoods_subdivide using gist(geom)
cluster nyc_neighborhoods_subdivide using nyc_neighborhoods_subdivide_geom_idx;
cluster nyc_2015_tree_census using nyc_2015_tree_census_geom_geom_idx;
select
count(a.ogc_fid) filter (
where
a.spc_common ilike '%maple%'
) :: numeric / count(a.ogc_fid) :: numeric as percent_trees,
count(a.ogc_fid) filter (
where
a.spc_common ilike '%maple%'
) as count_maples,
b.neighborhood
from
nyc_2015_tree_census a
join nyc_neighborhoods_subdivide b on st_intersects(a.geom, b.geom)
group by
b.neighborhood
select
st_difference(a.geom, st_transform(b.geom, 4326)) as geom
from
nyc_neighborhoods a,
nyc_zips b
where
b.zipcode = '10014'
and a.neighborhood = 'West Village'
select
st_intersection(a.geom, st_transform(b.geom, 4326)) as geom
from
nyc_neighborhoods a,
nyc_zips b
where
b.zipcode = '10003'
and a.neighborhood = 'Gramercy'
select
st_intersection(a.geom, st_transform(b.geom, 4326)) as geom
from
nyc_neighborhoods a,
nyc_zips b
where
b.zipcode = '10003'
and a.neighborhood = 'Gramercy'
union
select
geom
from
nyc_neighborhoods
where
neighborhood = 'Gramercy'
union
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10003'
-- Query all the street segments between W 39 ST and BANJ ST
with a as (
select
st_union(geom) as geom
from
nyc_bike_routes
where
fromstreet IN ('W HOUSTON ST', 'BANK ST')
or tostreet IN ('BANK ST', 'W 39 ST', 'W HOUSTON ST')
)
select
st_split(
-- Select the geometgry for the West Village
(
select
st_transform(geom, 4326)
from
nyc_neighborhoods
where
neighborhood = 'West Village'
),
-- Split it with our geometry in our CTE above
(
select
geom
from
a
)
)
select
st_subdivide(st_transform(geom, 4326), 50) as geom
from
nyc_neighborhoods
where
neighborhood = 'West Village'
select
st_union(geom) as geom
from
nyc_neighborhoods
where
st_intersects(
geom,
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10009'
)
)
alter table
nyc_311
add
column geom geometry;
update
nyc_311
set
geom = buildpoint(longitude, latitude, 4326);
create
or replace view nyc_311_dbscan_noise as
-- Create the clustering functon using the Window funciton syntax
select
id,
ST_ClusterDBSCAN(st_transform(geom, 3857), 30, 30) over () AS cid,
geom
from
nyc_311
where
-- Find just the complaints with "noise" in the description
-- and that are in zip code 10009
st_intersects(
geom,
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10009'
)
)
and complaint_type ilike '%noise%'
create
or replace view nyc_311_kmeans_noise as
-- Create the clustering functon using the Window funciton syntax
select
id,
ST_ClusterKMeans(st_transform(geom, 3857), 7, 1609) over () AS cid,
geom
from
nyc_311
where
-- Find just the complaints with "noise" in the description
-- and that are in zip code 10009
st_intersects(
geom,
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10009'
)
)
and complaint_type ilike '%noise%'
create table nyc_311_clusterwithin_noise as
-- Find the 311 calls in the 10009 zip code
with a as (
select
geom
from
nyc_311
where
st_intersects(
geom,
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10009'
)
)
and complaint_type ilike '%noise%'
),
-- In CTE "b", we have to unnest the results since it returns
-- geometries in an array
b as (
select
st_transform(
unnest(ST_ClusterWithin(st_transform(geom, 3857), 25)),
4326
) AS geom
from
a
)
-- row_number() over() creates an id for each row starting at 1
select
row_number() over() as id,
geom
from
b
select
zipcode,
st_transform(geom, 4326)
from
nyc_zips
where
-- Finds all of the zip codes that intersect the bounding box
-- of the East Village
st_transform(geom, 4326) && (
select
geom
from
nyc_2010_neighborhoods
where
ntaname = 'East Village'
)
UNION
-- Query to show the East Village bounding box on the map using ST_Envelope
select
'None' as zipcode,
st_envelope(
(
select
geom
from
nyc_2010_neighborhoods
where
ntaname = 'East Village'
)
)
select
geom
from
nyc_neighborhoods
where
geom &< (
select
geom
from
nyc_neighborhoods
where
neighborhood = 'East Village'
)
with ev as (
select
geom
from
nyc_neighborhoods
where
neighborhood = 'East Village'
),
ues as (
select
geom
from
nyc_neighborhoods
where
neighborhood = 'Upper East Side'
)
select
ev.geom <-> ues.geom,
st_distance(ev.geom, use.geom)
from
ev,
ues
with ev as (
select
geom :: geography
from
nyc_neighborhoods
where
neighborhood = 'East Village'
),
ues as (
select
geom :: geography
from
nyc_neighborhoods
where
neighborhood = 'Upper East Side'
)
select
ev.geom <-> ues.geom as new_operator,
st_distance(ev.geom, ues.geom)
from
ev,
ues
select
spc_common,
case
when spc_common ilike '%oak%' then 'Oak'
when spc_common ilike '%maple%' then 'Maple'
when spc_common ilike '%pine%' then 'Pine'
else NULL
end as tree_type
from
nyc_2015_tree_census
limit
100
create temporary table stadiums_matrix as with stadiums as (
select
'Citi Field' as stadium,
buildpoint(-73.845833, 40.756944, 4326) as geom
union
select
'Yankees Stadium' as stadium,
) buildpoint(-73.926389, 40.829167, 4326) as geom
select
a.stadium,
b.neighborhood,
st_distance(st_centroid(b.geom), a.geom)
from
stadiums a,
nyc_neighborhoods b
-- Find all the rows from the stadiums matrix for Citi Field
with mets as (
select
*
from
stadiums_matrix
where
stadium = 'Citi Field'
),
-- Find all the rows from the stadiums matrix for Yankees Stadium
yankees as (
select
*
from
stadiums_matrix
where
stadium = 'Yankees Stadium'
)
select
a.neighborhood,
b.st_distance as mets,
c.st_distance as yankees
from
nyc_neighborhoods a
join mets b using (neighborhood)
join yankees c using (neighborhood)
with mets as (
select
*
from
stadiums_matrix
where
stadium = 'Citi Field'
),
yankees as (
select
*
from
stadiums_matrix
where
stadium = 'Yankees Stadium'
)
select
a.neighborhood,
a.geom,
b.st_distance as mets,
c.st_distance as yankees
from
nyc_neighborhoods a
join mets b using (neighborhood)
join yankees c using (ntaname)
where
c.st_distance < b.st_distance
with mets as (
select
*
from
stadiums_matrix
where
stadium = 'Citi Field'
),
yankees as (
select
*
from
stadiums_matrix
where
stadium = 'Yankees Stadium'
)
select
a.ntaname,
a.geom,
b.st_distance as mets,
c.st_distance as yankees
from
nyc_neighborhoods a
join mets b using (neighborhood)
join yankees c using (neighborhood)
where
b.st_distance < c.st_distance
select
st_makeenvelope(-74.047196, 40.679654, -73.906769, 40.882012, 4326)
select
st_envelope(st_transform(geom, 4326)) as geom
from
nyc_zips
where
zipcode = '11231'
select
st_collect(st_transform(geom, 4326)) as geom
from
nyc_zips
where
zipcode = '11231'
select
*
from
nyc_bike_routes
order by
st_length(geom) desc
limit
1
select
st_lineinterpolatepoint(st_linemerge(geom), 0.5) as geom
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_length(st_transform(geom, 3857))
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_lineinterpolatepoint(
-- This is the line created from ST_LineMerge
st_linemerge(geom),
-- Here we divide 500 by the total length of the route
(
500 / (
select
st_length(st_transform(geom, 3857))
from
nyc_bike_routes
where
ogc_fid = 20667
)
)
) as geom
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_lineinterpolatepoints(
-- Our merged geometry
st_linemerge(geom),
-- Dividing the length of the route by 75
(
75 / (
select
st_length(st_transform(geom, 3857))
from
nyc_bike_routes
where
ogc_fid = 20667
)
)
) as geom
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_intersection(
st_transform(geom, 4326),
st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
) as geom
from
nyc_zips
select
st_difference(
st_transform(geom, 4326),
st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
) as geom
from
nyc_zips
select
spc_common,
case
when spc_common ilike '%oak%' then 'Oak'
when spc_common ilike '%maple%' then 'Maple'
when spc_common ilike '%pine%' then 'Pine'
else NULL
end as tree_type
from
nyc_2015_tree_census
limit
100
create temporary table stadiums_matrix as with stadiums as (
select
'Citi Field' as stadium,
buildpoint(-73.845833, 40.756944, 4326) as geom
union
select
'Yankees Stadium' as stadium,
) buildpoint(-73.926389, 40.829167, 4326) as geom
select
a.stadium,
b.neighborhood,
st_distance(st_centroid(b.geom), a.geom)
from
stadiums a,
nyc_neighborhoods b
-- Find all the rows from the stadiums matrix for Citi Field
with mets as (
select
*
from
stadiums_matrix
where
stadium = 'Citi Field'
),
-- Find all the rows from the stadiums matrix for Yankees Stadium
yankees as (
select
*
from
stadiums_matrix
where
stadium = 'Yankees Stadium'
)
select
a.neighborhood,
b.st_distance as mets,
c.st_distance as yankees
from
nyc_neighborhoods a
join mets b using (neighborhood)
join yankees c using (neighborhood)
with mets as (
select
*
from
stadiums_matrix
where
stadium = 'Citi Field'
),
yankees as (
select
*
from
stadiums_matrix
where
stadium = 'Yankees Stadium'
)
select
a.neighborhood,
a.geom,
b.st_distance as mets,
c.st_distance as yankees
from
nyc_neighborhoods a
join mets b using (neighborhood)
join yankees c using (ntaname)
where
c.st_distance < b.st_distance
with mets as (
select
*
from
stadiums_matrix
where
stadium = 'Citi Field'
),
yankees as (
select
*
from
stadiums_matrix
where
stadium = 'Yankees Stadium'
)
select
a.ntaname,
a.geom,
b.st_distance as mets,
c.st_distance as yankees
from
nyc_neighborhoods a
join mets b using (neighborhood)
join yankees c using (neighborhood)
where
b.st_distance < c.st_distance
select
st_makeenvelope(-74.047196, 40.679654, -73.906769, 40.882012, 4326)
select
st_envelope(st_transform(geom, 4326)) as geom
from
nyc_zips
where
zipcode = '11231'
select
st_collect(st_transform(geom, 4326)) as geom
from
nyc_zips
where
zipcode = '11231'
select
*
from
nyc_bike_routes
order by
st_length(geom) desc
limit
1
select
st_lineinterpolatepoint(st_linemerge(geom), 0.5) as geom
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_length(st_transform(geom, 3857))
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_lineinterpolatepoint(
-- This is the line created from ST_LineMerge
st_linemerge(geom),
-- Here we divide 500 by the total length of the route
(
500 / (
select
st_length(st_transform(geom, 3857))
from
nyc_bike_routes
where
ogc_fid = 20667
)
)
) as geom
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_lineinterpolatepoints(
-- Our merged geometry
st_linemerge(geom),
-- Dividing the length of the route by 75
(
75 / (
select
st_length(st_transform(geom, 3857))
from
nyc_bike_routes
where
ogc_fid = 20667
)
)
) as geom
from
nyc_bike_routes
where
ogc_fid = 20667
select
st_intersection(
st_transform(geom, 4326),
st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
) as geom
from
nyc_zips
select
st_difference(
st_transform(geom, 4326),
st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
) as geom
from
nyc_zips
select
st_generatepoints(st_transform(geom, 4326), 50) as geom
from
nyc_zips
where
zipcode = '10001'
select
(
st_dump(
st_generatepoints(st_transform(geom, 4326), 5000)
)
).geom as geom
from
nyc_zips
where
zipcode = '10001'
select
st_transform(geom, 4326) as geom
from
nyc_zips
where
zipcode = '11101'
select
(
st_dump(
st_generatepoints(st_transform(geom, 4326), 5000)
)
).geom as geom
from
nyc_zips
where
zipcode = '11101'
-- The "points" CTE generates 5,000 random points in the 11101 postal code
with points as (
select
(
st_dump(
st_generatepoints(st_transform(geom, 4326), 5000)
)
).geom as geom
from
nyc_zips
where
zipcode = '11101'
),
-- Create 6 even clusters using ST_ClusterKMeans
cluster as (
select
geom,
st_clusterkmeans(geom, 6) over () AS cluster
from
points
)
-- Group or collect each cluster and find the centroid
select
st_centroid(
st_collect(geom)
) as geom,
cluster
from
cluster
group by
cluster
with points as (
select
(
st_dump(
st_generatepoints(st_transform(geom, 4326), 5000)
)
).geom as geom
from
nyc_zips
where
zipcode = '11101'
),
cluster as (
select
geom,
st_clusterkmeans(geom, 6) over () AS cluster
from
points
),
centroid as (
select
st_centroid(st_collect(geom)) as geom,
cluster
from
cluster
group by
cluster
)
-- In this step we collect the centroids
-- then create Voronoi polygons, then extract
-- each individual polygon
select
(
st_dump(st_voronoipolygons(st_collect(geom)))
).geom AS geom
from
centroid
with points as (
select
(
st_dump(
st_generatepoints(st_transform(geom, 4326), 5000)
)
).geom as geom
from
nyc_zips
where
zipcode = '11101'
),
cluster as (
select
geom,
st_clusterkmeans(geom, 6) over () AS cluster
from
points
),
centroid as (
select
st_centroid(st_collect(geom)) as geom,
cluster
from
cluster
group by
cluster
),
voronoi as (
select
(
st_dump(st_voronoipolygons(st_collect(geom)))
).geom AS geom
from
centroid
)
-- In the last step, we find the intersection (or clip)
-- the 11101 zip code and the Voronoi polygons
select
st_intersection(
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '11101'
),
geom
)
from
voronoi
-- Setting up our function and inputs
create
or replace function st_equalarea(
seed_points int,
tablename text,
polygons int,
unique_id text
)
-- Define that the function returns a table and the return values
returns table (id varchar, geom geometry) language plpgsql
as $$
begin
return query
-- This will run the string as a query, filling in the values
execute format(
'with points as (select %s as id,
(st_dump( st_generatepoints( geom , %s) )).geom as geom from %s),
cluster as (select geom, id, st_clusterkmeans(geom, %s)
over (partition by id) AS cluster from points),
centroid as (select id, st_centroid(st_collect(geom)) as geom,
cluster from cluster group by cluster, id),
voronoi as (select id, (st_dump( st_voronoipolygons( st_collect(geom) ) )).geom
AS voronoi_geom from centroid group by id)
select b.id, st_intersection(a.geom, b.voronoi_geom) as
geom from voronoi b join %s a on b.id = %s;',
-- These values will replace each instance of %s in the order they appear
unique_id,
seed_points,
tablename,
polygons,
tablename,
unique_id
);
end;
$$
select
*
from
st_equalarea(1000, 'nyc_zips', 6, 'zipcode')
select
id,
st_transform(geom, 4326) as geom
from
st_equalarea(1000, 'nyc_zips', 6, 'zipcode')
create
or replace function st_equalareawithinput(
seed_points int,
tablename text,
polygons int,
unique_id text
) returns table (id varchar, geom geometry) language plpgsql as $$ begin return query execute format(
'with points as (select %s as id, (st_dump( st_generatepoints( geom , %s) )).geom
as geom from %s),
area as (select st_area(geom) as area, zipcode from %s),
cluster as (select points.geom, points.id,
st_clusterkmeans(points.geom, ceil((a.area/(%s)))::int) over (partition by id) AS cluster
from points left join area a on a.zipcode = points.id),
centroid as (select id, st_centroid(st_collect(geom)) as geom,
cluster from cluster group by cluster, id),
voronoi as (select id,(st_dump( st_voronoipolygons( st_collect(geom) ) )).geom AS voronoi_geom
from centroid group by id) select b.id, st_intersection(a.geom, b.voronoi_geom) as geom
from voronoi b join %s a on b.id = %s;',
unique_id,
seed_points,
tablename,
tablename,
polygons,
tablename,
unique_id
);
end;
$$
create table nyc_zips_50_acres as
select
id,
st_transform(geom, 4326) as geom
from
st_equalareawithinput(1000, 'nyc_zips', (43560 * 50), 'zipcode')
-- Create a point at the Plaza Hotel
with point as (
select
buildpoint(-73.9730423, 40.7642784, 4326) as geom
),
-- Find all the pickups within 50 meters of the Plaza
-- on June 1st, 2016 and calculate distance with a cross
-- join. Since it is just one point the cross join is acceptable
-- since it is just one row in that table.
start as (
select
*
from
nyc_yellow_taxi_0601_0615_2016,
point
where
st_dwithin(pickup :: geography, point.geom :: geography, 50)
and pickup_datetime :: date = '06-01-2016'
)
select
ogc_fid,
trip_distance,
total_amount,
-- Create a line from the Plaza (in the subquery in the second argument)
-- and all the dropoff locations
st_makeline(
dropoff,
(
select
geom
from
point
)
) as geom
from
start
select
a.id,
st_makeline(a.geom, b.geom) as geom
from
destinations a
left join origins b using (id)
select
geom,
health,
spc_common
from
nyc_2015_tree_census
order by
st_distance(buildpoint(-73.977733, 40.752273, 4326), geom)
limit
3
select
geom,
health,
spc_common
from
nyc_2015_tree_census
order by
buildpoint(-73.977733, 40.752273, 4326) <-> geom
limit
3
-- Create a point at John's of Bleeker Street
with point as (
select
buildpoint(-74.003544, 40.7316243, 4326) :: geography as geog
),
-- Find all the buildings that are within 1 kilometer of John's
buildings as (
select
geom,
name,
mpluto_bbl
from
nyc_building_footprints
where
st_dwithin(
geom::geography,
(
select
geog
from
point
),
1000
)
)
-- Selects three columns from the "buildings" CTE
-- And one from the cross join lateral
select
geom,
name,
mpluto_bbl,
nearest.distance
from
buildings
-- This join selects the distance to the nearest fire hydrant laterally
-- or for each row in the "buildings" dataset. As you can see it uses
-- columns from the outside query, and limits the results to 1
cross join lateral (
select
unitid,
st_distance(geom :: geography, buildings.geom :: geography) as distance
from
nyc_fire_hydrants
order by
geom <-> buildings.geom
limit
1
) nearest
create table nearest_hydrant_pizza as with point as (
select
buildpoint(-74.003544, 40.7316243, 4326) :: geography as geog
),
buildings as (
select
geom,
name,
mpluto_bbl
from
nyc_building_footprints
where
st_dwithin(
geom :: geography,
(
select
geog
from
point
),
1000
)
)
select
geom,
name,
mpluto_bbl,
nearest.distance
from
buildings
cross join lateral (
select
unitid,
st_distance(geom :: geography, buildings.geom :: geography) as distance
from
nyc_fire_hydrants
order by
geom <-> buildings.geom
limit
1
) nearest
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \ dbname=gis port=25432" \
street_centerlines.geojson \
-nln nyc_street_centerlines -lco GEOMETRY_NAME=geom
select
*
from
nyc_yellow_taxi_0601_0615_2016
where
st_dwithin(
pickup :: geography,
buildpoint(-73.987224, 40.733342, 4326) :: geography,
300
)
and pickup_datetime between '2016-06-02 9:00:00+00'
and '2016-06-02 18:00:00+00'
-- Select all pickups within 300 meters of Union Square
-- between 9am and 6pm on June 2nd, 2016
with pickups as (
select
pickup,
tip_amount,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
st_dwithin(
pickup :: geography,
buildpoint(-73.987224, 40.733342, 4326) :: geography,
300
)
and pickup_datetime between '2016-06-02 9:00:00+00'
and '2016-06-02 18:00:00+00'
)
-- Find the nearest road to each point using a
-- cross join lateral
select
a.*,
street.name,
street.ogc_fid,
street.geom
from
pickups a
cross join lateral (
select
ogc_fid,
full_stree as name,
geom
from
nyc_street_centerlines
order by
a.pickup <-> geom
limit
1
) street
with pickups as (
select
pickup,
tip_amount,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
st_dwithin(
pickup :: geography,
buildpoint(-73.987224, 40.733342, 4326) :: geography,
300
)
and pickup_datetime between '2016-06-02 9:00:00+00'
and '2016-06-02 18:00:00+00'
),
nearest as (
select
a.*,
street.name,
street.ogc_fid,
street.geom
from
pickups a
cross join lateral (
select
ogc_fid,
full_stree as name,
geom
from
nyc_street_centerlines
order by
a.pickup <-> geom
limit
1
) street
)
select
a.*,
-- Create a line between the original point and the new snapped point
st_makeline(
pickup,
st_lineinterpolatepoint(
st_linemerge(b.geom),
st_linelocatepoint(st_linemerge(b.geom), pickup)
)
) as line,
-- Add a column for the snapped point
st_lineinterpolatepoint(
st_linemerge(b.geom),
st_linelocatepoint(st_linemerge(b.geom), pickup)
) as snapped
from
nearest a
join nyc_street_centerlines b using (ogc_fid)
create
or replace view nyc_taxi_union_square as with pickups as (
select
pickup,
tip_amount,
total_amount
from
nyc_yellow_taxi_0601_0615_2016
where
st_dwithin(
pickup :: geography,
buildpoint(-73.987224, 40.733342, 4326) :: geography,
300
)
and pickup_datetime between '2016-06-02 9:00:00+00'
and '2016-06-02 18:00:00+00'
),
nearest as (
select
a.*,
street.name,
street.ogc_fid,
street.geom
from
pickups a
cross join lateral (
select
ogc_fid,
full_stree as name,
geom
from
nyc_street_centerlines
order by
a.pickup <-> geom
limit
1
) street
)
select
a.*,
st_makeline(
pickup,
st_lineinterpolatepoint(
st_linemerge(b.geom),
st_linelocatepoint(st_linemerge(b.geom), pickup)
)
) as line,
st_lineinterpolatepoint(
st_linemerge(b.geom),
st_linelocatepoint(st_linemerge(b.geom), pickup)
) as snapped
from
nearest a
join nyc_street_centerlines b using (ogc_fid)
create table nyc_taxi_union_square_snapped as
select
snapped,
name,
ogc_fid
from
nyc_taxi_union_square;
create table nyc_taxi_union_square_roads as
select
geom,
name,
ogc_fid
from
nyc_taxi_union_square;
create table nyc_taxi_union_square_points as
select
pickup,
name,
ogc_fid
from
nyc_taxi_union_square;
create table nyc_taxi_union_square_lines as
select
lines,
name,
ogc_fid
from
nyc_taxi_union_square;
-- Calculate the tip percentage for each trip with a total over $0
with a as (
select
*,
tip_amount /(total_amount - tip_amount) as tip_percent
from
nyc_taxi_union_square
where
total_amount > 0
)
-- Get the average tip percentage for each road segment
select
avg(a.tip_percent),
b.geom,
b.ogc_fid
from
a
join nyc_street_centerlines b using (ogc_fid)
group by
b.geom,
b.ogc_fid
create table nyc_taxi_union_square_tips as with a as (
select
*,
tip_amount / total_amount as tip_percent
from
nyc_taxi_union_square
where
total_amount > 0
)
select
avg(a.tip_percent),
b.geom,
b.ogc_fid
from
a
join nyc_street_centerlines b using (ogc_fid)
group by
b.geom,
b.ogc_fid
select
mode() within group (
ORDER BY
a.spc_common
) as most_common
from
nyc_2015_tree_census a
join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
where
b.neighborhood = 'East Village'
select
count(a.*),
a.spc_common
from
nyc_2015_tree_census a
join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
where
b.neighborhood = 'East Village'
group by
a.spc_common
order by
count(a.*) desc
with building as (
select
geom
from
nyc_building_footprints
where
name = 'Empire State Building'
)
select
st_shortestline(h.geom, b.geom)
from
nyc_fire_hydrants h,
building b
order by
h.geom <-> b.geom asc
limit
100
select
array(
select
geom
from
nyc_311
where
geom is not null
limit
10
)
select
st_makeline(
array(
select
geom
from
nyc_311
where
geom is not null
order by
id
limit
10
)
)
select
-- Clips the 100 meter buffer out of the 10001 zip code
st_difference(
st_transform(geom, 4326),
-- Creates a 100 meter buffer
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
)
from
nyc_zips
where
zipcode = '10001'
with a as (
select
10001 as id,
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10001'
)
select
id,
-- Takes only the exterior perimiter of the polygon
st_exteriorring(geom) as geom
from
a
with a as (
select
10001 as id,
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10001'
)
select
id,
-- Creates a new polygon just from the exterior ring
-- which removes all holes
st_makepolygon(
st_exteriorring(geom)
) as geom
from
a
-- Creates a polygon with a hole in zip code 10001
with a as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10001'
),
-- Creates a polygon with a hole in zip code 10017
b as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10017'
),
-- Unions both polygons into a multi-polygon
c as (
select
1 as id,
st_union(a.geom, b.geom)
from
a,
b
)
select
*
from
c
with a as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10001'
),
b as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10017'
),
c as (
select
1 as id,
st_union(a.geom, b.geom) as geom
from
a,
b
)
select
id,
(st_dump(geom)).geom
from
c
with a as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10001'
),
b as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10017'
),
c as (
select
1 as id,
st_union(a.geom, b.geom) as geom
from
a,
b
)
select
id,
-- Takes the exterior ring from the geometry dump
st_exteriorring(
(st_dump(geom)).geom
)
from
c
with a as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10001'
),
b as (
select
st_difference(
st_transform(geom, 4326),
st_buffer(
buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
100
) :: geometry
) as geom
from
nyc_zips
where
zipcode = '10017'
),
c as (
select
1 as id,
st_union(a.geom, b.geom) as geom
from
a,
b
)
select
id,
-- Collect the individual geometries to turn them back into
-- one polygon
st_collect(st_makepolygon(geom)) as geom
from
(
select
id,
st_exteriorring((st_dump(geom)).geom) as geom
from
c
) s
group by
id;
select
*
from
st_maximuminscribedcircle(
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10001'
)
)
select
center
from
st_maximuminscribedcircle(
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10001'
)
)
select
st_buffer(center, radius)
from
st_maximuminscribedcircle(
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10001'
)
)
select
st_transform(center, 4326) as geom
from
st_maximuminscribedcircle(
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10001'
)
)
union
select
st_centroid(st_transform(geom, 4326)) as geom
from
nyc_zips
where
zipcode = '10001'
select
st_boundary(st_transform(geom, 4326))
from
nyc_zips
where
zipcode = '10001'
select
st_makeline(
st_pointn(
st_boundary(st_transform(z.geom, 4326)),
numbers.num
),
st_pointn(
st_boundary(st_transform(z.geom, 4326)),
numbers.num + 1
)
),
numbers.num
from
nyc_zips z
cross join lateral generate_series(1, st_npoints(z.geom) - 1) as numbers(num)
where
z.zipcode = '10001'
select
st_polygonize(
st_makeline(
st_pointn(
st_boundary(st_transform(z.geom, 4326)),
numbers.num
),
st_pointn(
st_boundary(st_transform(z.geom, 4326)),
numbers.num + 1
)
)
)
from
nyc_zips z
cross join lateral generate_series(1, st_npoints(z.geom) - 1) as numbers(num)
where
z.zipcode = '10001'
select
st_transform(
st_snaptogrid(st_transform(geom, 3857), 500, 1000),
4326
) as geom
from
nyc_311
limit
100000
select
st_delaunaytriangles(st_transform(geom, 4326))
from
nyc_zips
where
zipcode = '10009'
with a as (
select
(
-- Create a dump to return the individual triangles
st_dump(
-- Create the triangles like above
st_delaunaytriangles(st_transform(geom, 4326))
)
).geom
from
nyc_zips
where
zipcode = '10009'
)
-- Select and order by areas
select
a.geom,
st_area(geom) as area
from
a
order by
st_area(geom) desc
limit
10
-- Select all bike routes on Hudson Street
with lines as (
select
1 as id,
st_linemerge(st_union(geom)) as geom
from
nyc_bike_routes
where
street = 'HUDSON ST'
)
select
*
from
lines
with lines as (
select
1 as id,
ST_LineMerge(st_union(geom)) as geom
from
nyc_bike_routes
where
street = 'HUDSON ST'
),
-- Dump all of the points and find the length of the geometry
first as (
select
id,
st_dumppoints(geom) as dump,
st_length(geom) as len,
geom
from
lines
)
select
*
from
first
with lines as (
select
1 as id,
ST_LineMerge(st_union(geom)) as geom
from
nyc_bike_routes
where
street = 'HUDSON ST'
),
first as (
select
id,
st_dumppoints(geom) as dump,
st_length(geom) as len,
geom
from
lines
),
-- For each path, select the id, path, and a buffer
-- around the path point. Using ST_LineLocatePoint
-- we use the line geometry and the point to find
-- the distance along the line, then make it smaller
-- using the log of the length
second as (
select
id,
(dump).path [1],
st_buffer(
(dump).geom,
ST_LineLocatePoint(geom, (dump).geom) * log(len)
) as geom
from
first
)
select
*
from
second
with lines as (
select
1 as id,
st_linemerge(st_union(geom)) as geom
from
nyc_bike_routes
where
street = 'HUDSON ST'
),
first as (
select
id,
st_dumppoints(geom) as dump,
st_length(geom) as len,
geom
from
lines
),
second as (
select
id,
(dump).path [1],
st_buffer(
(dump).geom,
ST_LineLocatePoint(geom, (dump).geom) * len / 10
) as geom
from
first
),
-- Create a convex hull around the buffers by union-ing
-- all the buffers together. These are ordered using the
-- LEAD window function and partition
third as (
select
id,
st_convexhull(
st_union(
geom,
lead(geom) over(
partition by id
order by
id,
path
)
)
) as geom
from
second
)
select
id,
st_union(geom) as geom
from
third
group by
id
with lines as (
select
1 as id,
st_linemerge(st_union(geom)) as geom
from
nyc_bike_routes
where
street = 'HUDSON ST'
),
first as (
select
id,
st_dumppoints(geom) as dump,
st_length(geom) as len,
geom
from
lines
),
second as (
select
id,
(dump).path [1],
st_transform(
st_buffer(st_transform((dump).geom, 3857), random() * 100),
4326
) as geom
from
first
),
third as (
select
id,
st_convexhull(
st_union(
geom,
lead(geom) over(
partition by id
order by
id,
path
)
)
) as geom
from
second
)
select
id,
st_union(geom) as geom
from
third
group by
id
select
st_symdifference(
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'NoHo'
),
(
select
st_transform(geom, 4326)
from
nyc_zips
where
zipcode = '10012'
)
)
ogrinfo ACS_2021_5YR_BG_36_NEW_YORK.gdb ACS_2021_5YR_BG_36_NEW_YORK -geom=YES -so
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, b01001e1 as population, B01002e1 as age from X01_AGE_AND_SEX" \
-nln nys_2021_census_block_groups_pop -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \ dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, B19001e1 as income from X19_INCOME" \
-nln nys_2021_census_block_groups_income -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select SHAPE as geom, GEOID as geoid from ACS_2021_5YR_BG_36_NEW_YORK" \
-nln nys_2021_census_block_groups_geom -lco GEOMETRY_NAME=geom
update nyc_neighborhoods set geom = st_makevalid(geom) where st_isvalid(geom) is false
select
neighborhood,
-- These are the values from the cross join lateral
a.pop,
a.count,
a.avg
from
nyc_neighborhoods
cross join lateral (
select
-- This selects the sum of all the intersecting areas
-- populations using the proportional overlap calculation
sum(
population * (
st_area(st_intersection(geom, nyc_neighborhoods.geom)) / st_area(geom)
)
) as pop,
count(*) as count,
-- This selects the average area overlapping area
-- of all the intersecting areas
avg(
(
st_area(st_intersection(nyc_neighborhoods.geom, geom)) / st_area(geom)
)
) as avg
from
nys_2021_census_block_groups
where
left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
and st_intersects(nyc_neighborhoods.geom, geom)
) a
order by
a.pop desc
ogrinfo ACS_2021_5YR_BG_36_NEW_YORK.gdb ACS_2021_5YR_BG_36_NEW_YORK -geom=YES -so
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, b01001e1 as population, B01002e1 as age from X01_AGE_AND_SEX" \
-nln nys_2021_census_block_groups_pop -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \ dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, B19001e1 as income from X19_INCOME" \
-nln nys_2021_census_block_groups_income -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select SHAPE as geom, GEOID as geoid from ACS_2021_5YR_BG_36_NEW_YORK" \
-nln nys_2021_census_block_groups_geom -lco GEOMETRY_NAME=geom
update nyc_neighborhoods set geom = st_makevalid(geom) where st_isvalid(geom) is false
select
neighborhood,
-- These are the values from the cross join lateral
a.pop,
a.count,
a.avg
from
nyc_neighborhoods
cross join lateral (
select
-- This selects the sum of all the intersecting areas
-- populations using the proportional overlap calculation
sum(
population * (
st_area(st_intersection(geom, nyc_neighborhoods.geom)) / st_area(geom)
)
) as pop,
count(*) as count,
-- This selects the average area overlapping area
-- of all the intersecting areas
avg(
(
st_area(st_intersection(nyc_neighborhoods.geom, geom)) / st_area(geom)
)
) as avg
from
nys_2021_census_block_groups
where
left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
and st_intersects(nyc_neighborhoods.geom, geom)
) a
order by
a.pop desc
ogr2ogr \
- f PostgreSQL PG :"host=localhost user=docker password=docker \
dbname=gis port=25432" Subway_Entrances.geojson \
- nln nyc_subway_enterances - lco GEOMETRY_NAME = geom
select
sw.name,
sw.objectid,
near.tree_id,
near.spc_common,
near.distance
from
nyc_subway_enterances sw
-- Since this is a cross join it will join to every possible combination
-- Since we have a limit of 5 below, it will join it to each row in the
-- main subway enterances table 5 times
cross join lateral (
select
tree_id,
spc_common,
st_distance(sw.geom :: geography, geom :: geography) as distance
from
nyc_2015_tree_census
order by
sw.geom <-> geom
limit
5
) near
select
sw.name,
sw.objectid,
near.tree_id,
near.spc_common,
near.distance,
near.ranking
from
nyc_subway_enterances sw
cross join lateral (
select
tree_id,
spc_common,
st_distance(sw.geom :: geography, geom :: geography) as distance,
row_number() over() as ranking
from
nyc_2015_tree_census
order by
sw.geom <-> geom
limit
5
) near
limit
50
select
sw.name,
sw.objectid,
near.tree_id,
near.spc_common,
near.distance,
rank() over(
-- The partition will perform the ranking for each
-- subway enterance ID for the 5 matching trees linked
-- to that station ID
partition by sw.objectid
-- The ranking will be ordered by the distance
order by
distance desc
)
from
nyc_subway_enterances sw
cross join lateral (
select
tree_id,
spc_common,
st_distance(sw.geom :: geography, geom :: geography) as distance
from
nyc_2015_tree_census
order by
sw.geom <-> geom
limit
5
) near
limit
50
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" planimetrics_2018_roofprints.json \
-nln denver_buildings -lco GEOMETRY_NAME=geom
select
geom,
bldg_heigh,
ground_ele,
gid
from
denver_buildings
order by
random()
limit
2
with a as (
select
geom,
bldg_heigh,
ground_ele,
gid
from
denver_buildings
order by
random()
limit
2
)
select
-- This will create an array for the two buildings above.
-- One below for the centroid
array_agg(st_centroid(st_transform(geom, 4326))) as geom,
-- One for the building height + ground height
array_agg(bldg_heigh + ground_ele) as height,
-- One for the building IDs
array_agg(gid) as id
from
a
with a as (
select
geom,
bldg_heigh,
ground_ele,
gid
from
denver_buildings
order by
random()
limit
2
), bldgs as (
select
array_agg(st_centroid(st_transform(geom, 4326))) as geom,
array_agg(bldg_heigh + ground_ele) as height,
array_agg(gid) as id
from
a
),
line as (
select
-- Here we create a line between the two centroids
-- using both the geometries in individual subqueries
st_makeline(
(
select
geom [1]
from
bldgs
),
(
select
geom [2]
from
bldgs
)
)
)
select
*
from
line
with a as (
select
geom,
bldg_heigh,
ground_ele,
gid
from
denver_buildings
order by
random()
limit
2
), bldgs as (
select
array_agg(st_centroid(st_transform(geom, 4326))) as geom,
array_agg(bldg_heigh + ground_ele) as height,
array_agg(gid) as id
from
a
),
line as (
-- Here we use a simple select statement rather tha subqueries
-- like the previous step to grab the height column too
select
st_makeline(geom [1], geom [2]) as geom,
height
from
bldgs
)
select
-- This will return all the buildings higher than the line
b.gid,
b.bldg_heigh + b.ground_ele as height,
st_transform(b.geom, 4326),
line.height
from
denver_buildings b
join line on st_intersects(line.geom, st_transform(b.geom, 4326))
where
-- This finds any building taller than either of our two buildings
b.bldg_heigh + b.ground_ele < line.height [1]
or b.bldg_heigh + b.ground_ele < line.height [2]
alter table
denver_buildings
add
column geom_z geometry
update
denver_buildings
set
geom_z = st_force3d(
-- First value is the building geometry in EPSG 4326
st_transform(geom, 4326),
-- This is our Z or height value, the building height
-- plus the ground elevation
bldg_heigh + ground_ele
)
-- Find our first building
with a as (
select
geom_z,
gid
from
denver_buildings
limit
1 offset 100
),
-- Find a building with its ID and GeometryZ
-- within 2 kilometers
b as (
select
geom_z,
gid
from
denver_buildings
where
st_dwithin(
st_transform(geom, 3857),
(
select
st_transform(geom, 3857)
from
a
),
2000
)
limit
1
),
-- Use UNION to create a single table with matching columns
c as (
select
*
from
a
union
select
*
from
b
),
-- Store the geometries and and IDs in arrays
bldgs as (
select
array_agg(st_centroid(geom_z)) as geom,
array_agg(gid) as id
from
c
),
-- This query finds all the buildings within 3 kilometers of each building
denver as (
select
st_transform(geom, 3857) as geom,
geom_z,
gid
from
denver_buildings
where
st_dwithin(
-- We union the two points and turn them back into 2D
-- Geometries so we can run the query to find all
-- buildings with 3 kilometers one time
st_union(
(
select
st_force2d(st_transform(geom [1], 3857))
from
bldgs
),
(
select
st_force2d(st_transform(geom [2], 3857))
from
bldgs
)
),
st_transform(geom, 3857),
3000
)
)
select
d.gid,
st_transform(d.geom, 4326) as geom
from
denver d
-- Now we can use our ST_3DIntersects funciton to find
-- all the buildings crossing our path
join bldgs on st_3dintersects(
-- The path can be built using the same ST_MakeLine
-- function we have been using
st_makeline(bldgs.geom [1], bldgs.geom [2]),
d.geom_z
)
create table nyc_2021_census_block_groups_morans_i as
select
*
from
nys_2021_census_block_groups
where
left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
and population > 0
with a as (
select
*
from
nyc_2021_census_block_groups
where
geoid = '360470201002'
)
select
bgs.geoid,
-- Finds the number of points of the
-- portion of the border that intersects
st_npoints(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
)
) as intersected_points,
-- Finds the length of the
-- portion of the border that intersects
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
) as length,
geom
from
nyc_2021_census_block_groups bgs
where
st_intersects(
(
select
geom
from
a
),
bgs.geom
)
and bgs.geoid != (
select
geoid
from
a
)
with a as (
select
*
from
nyc_2021_census_block_groups
where
geoid = '360470201002'
)
select
bgs.*,
st_npoints(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
)
) as intersected_points,
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
) as length,
geom
from
nyc_2021_census_block_groups bgs
where
-- Only select polygons that have a border overlap
-- of 2 points or more
st_npoints(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
)
) > 1
and bgs.geoid != (
select
geoid
from
a
)
with a as (
select
*
from
nyc_2021_census_block_groups
where
geoid = '360470201002'
)
select
bgs.*,
st_npoints(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
)
) as intersected_points,
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
) as length,
geom
from
nyc_2021_census_block_groups bgs
where
-- Only select polygons that have a border overlap
-- of 100 meters or more
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
) > 100
and bgs.geoid != (
select
geoid
from
a
)
with a as (
select
*
from
nyc_2021_census_block_groups
where
geoid = '360470201002'
)
select
bgs.geoid,
geom,
st_npoints(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
)
) as intersected_points,
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
) as length,
(
-- Finding the length of the shared border
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
-- Dividing that by the perimiter of the source polygon
) / st_perimeter(
(
select
geom :: geography
from
a
)
)
) as percent_of_source
from
nyc_2021_census_block_groups bgs
where
-- Finding touching polygons that share more than 25% of
-- the source polygon's border
(
st_length(
st_intersection(
bgs.geom,
(
select
geom
from
a
)
) :: geography
) / st_perimeter(
(
select
geom :: geography
from
a
)
)
) >.25
and bgs.geoid != (
select
geoid
from
a
)
alter table
nyc_building_footprints
add
column h3 text
update
nyc_building_footprints
set
h3 = h3_lat_lng_to_cell(
st_centroid(
st_transform(geom, 4326)
), 10)
select
bin,
st_transform(geom, 4326) as geom
from
nyc_building_footprints b
where
-- Returns all the H3 cells that meet the condition in the subquery
h3 in (
select
h3
from
nyc_building_footprints
-- First we group the H3 cells to use the aggregate function
group by
h3
-- This finds all the H3 cells that have an aggregate
-- count greater than 1
having
count(*) = 1
)
select
bin,
closest.distance,
st_transform(geom, 4326) as geom
from
nyc_building_footprints b
cross join lateral (
select
-- Finding the distance to the nearest building in meters
st_distance(
st_transform(geom, 3857),
st_transform(b.geom, 3857)
) as distance
from
nyc_building_footprints
-- This removes the ID of the building we want to analyze
where
bin != b.bin
order by
geom <-> b.geom
limit
1
) closest
where
h3 in (
select
h3
from
nyc_building_footprints
group by
h3
having
count(*) = 1
)
order by
closest.distance desc
select
*
from
nyc_building_footprints
where
bin = '2127308'
create
or replace function st_kdensity(ids bigint [], geoms geometry [])
returns table(id bigint, geom geometry, kdensity integer) as $$ declare mc geometry;
c integer;
k numeric;
begin mc := st_centroid(st_collect(geoms));
c := array_length(ids, 1);
k := sqrt(1 / ln(2));
return query with dist as (
select
t.gid,
t.g,
st_distance(t.g, mc) as distance
from
unnest(ids, geoms) as t(gid, g)
),
md as (
select
percentile_cont(0.5) within group (
order by
distance
) as median
from
dist
),
sd as (
select
sqrt(
sum((st_x(g) - st_x(mc)) ^ 2) / c + sum((st_y(g) - st_y(mc)) ^ 2) / c
) as standard_distance
from
dist
),
sr as (
select
0.9 * least(sd.standard_distance, k * md.median) * c ^(-0.2) as search_radius
from
sd,
md
)
select
gid as id,
g as geom,
kd :: int as kdensity
from
sr,
dist as a,
lateral(
select
count(*) as kd
from
dist _b
where
st_dwithin(a.g, _b.g, sr.search_radius)
) b;
end;
$$ language plpgsql immutable parallel safe;
create table east_village_kde as WITH a AS(
SELECT
array_agg(ogc_fid) as ids,
array_agg(geom) as geoms
FROM
nyc_2015_tree_census
where
st_intersects(
geom,
(
select
geom
from
nyc_neighborhoods
where
neighborhood = 'East Village'
)
)
)
SELECT
b.*
FROM
a,
ST_KDensity(a.ids, a.geoms) b
with buildings_0 as(
select
t.geom
from
-- Find all the buildingswithin 2 kilometers of Times Square where
-- geometries are stored in an array
unnest(
(
select
array_agg(geom)
from
nyc_building_footprints
where
st_dwithin(
geom :: geography,
st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
2000
)
)
) as t(geom)
),
buildings_crop as(
-- Now only the buildings within 630 meters of Times Square
select
geom
from
buildings_0
where
st_dwithin(
st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
geom :: geography,
630
)
),
buildings as(
-- Union these two tables together
select
geom
from
buildings_crop
union
all
select
st_buffer(
st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
630
) :: geometry as geom
)
select
*
from
buildings
create
or replace function isovist(
-- Sets up the arguments for our function
in center geometry,
in polygons geometry [],
in radius numeric default 150,
in rays integer default 36,
in heading integer default -999,
in fov integer default 360
) returns geometry as $$ declare arc numeric;
-- Creates static variables for angle and geometry
angle_0 numeric;
geomout geometry;
-- Calculates the arc distance using the field of view (fov)
-- degrees and the number of rays
begin arc := fov :: numeric / rays :: numeric;
if fov = 360 then angle_0 := 0;
else angle_0 := heading - 0.5 * fov;
end if;
with buildings_0 as(
select
t.geom
from
unnest(polygons) as t(geom)
),
buildings_crop as(
select
geom
from
buildings_0
where
st_dwithin(center :: geography, geom :: geography, radius)
),
buildings as(
select
geom
from
buildings_crop
union
all
select
st_buffer(center :: geography, radius) :: geometry as geom
),
rays as(
select
t.n as id,
st_setsrid(
st_makeline(
center,
st_project(
center :: geography,
radius + 1,
radians(angle_0 + t.n :: numeric * arc)
) :: geometry
),
4326
) as geom
from
generate_series(0, rays) as t(n)
),
intersections as(
select
r.id,
(
st_dump(st_intersection(st_boundary(b.geom), r.geom))
).geom as point
from
rays r
left join buildings b on st_intersects(b.geom, r.geom)
),
intersections_distances as(
select
id,
point as geom,
row_number() over(
partition by id
order by
center <-> point
) as ranking
from
intersections
),
intersection_closest as(
select
-1 as id,
case
when fov = 360 then null :: geometry
else center
end as geom
union
all (
select
id,
geom
from
intersections_distances
where
ranking = 1
order by
id
)
union
all
select
999999 as id,
case
when fov = 360 then null :: geometry
else center
end as geom
),
isovist_0 as(
select
st_makepolygon(st_makeline(geom)) as geom
from
intersection_closest
),
isovist_buildings as(
select
st_collectionextract(st_union(b.geom), 3) as geom
from
isovist_0 i,
buildings_crop b
where
st_intersects(b.geom, i.geom)
)
select
coalesce(st_difference(i.geom, b.geom), i.geom) into geomout
from
isovist_0 i,
isovist_buildings b;
return geomout;
end;
$$ language plpgsql immutable;
select
*
from
isovist(
-- This is our center point in Times Square
st_setsrid(st_makepoint(-73.985136, 40.758786), 4326),
(
-- Selects all the geometries
select
array_agg(geom)
from
nyc_building_footprints
where
st_dwithin(
geom :: geography,
st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
2000
)
),
300,
36
)
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis port=25432" nyc_pharmacies.geojson \
-nln nyc_pharmacies -lco GEOMETRY_NAME=geom
-- Selects necessary data for Duane Reade locations
with a as (
select
id,
amenity,
brand,
name,
geom
from
nyc_pharmacies
where
name ilike '%duane%'
),
-- Spatially join the Duane Reade stores to neighborhoods and adds neighborhood names
b as (
select
a.*,
b.neighborhood
from
a
join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
),
-- Creates a buffer for each store
c as (
select
id,
st_buffer(st_transform(geom, 3857), 800) as buffer,
neighborhood
from
b
),
-- Union the buffers together by neighborhood
d as (
select
st_transform(st_union(buffer), 4326) as geom,
neighborhood
from
c
group by
neighborhood
),
-- Caluclates the proportional population for each group of buffers
-- and also the area of the groupped buffers
e as (
select
d.*,
sum(
bgs.population * (
st_area(st_intersection(d.geom, bgs.geom)) / st_area(bgs.geom)
)
) as pop,
st_area(st_transform(d.geom, 3857)) as area
from
d
join nys_2021_census_block_groups bgs on st_intersects(bgs.geom, d.geom)
group by
d.geom,
d.neighborhood
)
-- Calculates the population density
select
neighborhood,
pop / area as potential
from
e
order by
pop / area desc
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis port=25432" nyc_pharmacies.geojson \
-nln nyc_pharmacies -lco GEOMETRY_NAME=geom
-- Selects necessary data for Duane Reade locations
with a as (
select
id,
amenity,
brand,
name,
geom
from
nyc_pharmacies
where
name ilike '%duane%'
),
-- Spatially join the Duane Reade stores to neighborhoods and adds neighborhood names
b as (
select
a.*,
b.neighborhood
from
a
join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
),
-- Creates a buffer for each store
c as (
select
id,
st_buffer(st_transform(geom, 3857), 800) as buffer,
neighborhood
from
b
),
-- Union the buffers together by neighborhood
d as (
select
st_transform(st_union(buffer), 4326) as geom,
neighborhood
from
c
group by
neighborhood
),
-- Caluclates the proportional population for each group of buffers
-- and also the area of the groupped buffers
e as (
select
d.*,
sum(
bgs.population * (
st_area(st_intersection(d.geom, bgs.geom)) / st_area(bgs.geom)
)
) as pop,
st_area(st_transform(d.geom, 3857)) as area
from
d
join nys_2021_census_block_groups bgs on st_intersects(bgs.geom, d.geom)
group by
d.geom,
d.neighborhood
)
-- Calculates the population density
select
neighborhood,
pop / area as potential
from
e
order by
pop / area desc
create table tree_similarity as
-- Finds the count of all trees in each neighborhood
with a as (
select
count(t.*) as total_trees,
n.neighborhood
from
nyc_2015_tree_census t
join nyc_neighborhoods n on st_intersects(t.geom, n.geom)
group by
n.neighborhood
)
-- Finds the count of each type of tree in each neighborhood
select
n.neighborhood,
(
count(t.*) filter (
where
t.spc_common ilike '%pine%'
) :: numeric / a.total_trees
) as pine,
(
count(t.*) filter (
where
t.spc_common ilike '%maple%'
) :: numeric / a.total_trees
) as maple,
(
count(t.*) filter (
where
t.spc_common ilike '%linden%'
) :: numeric / a.total_trees
) as linden,
(
count(t.*) filter (
where
t.spc_common ilike '%honeylocust%'
) :: numeric / a.total_trees
) as honeylocust,
(
count(t.*) filter (
where
t.spc_common ilike '%oak%'
) :: numeric / a.total_trees
) as oak
-- Joins the above data with data from the original CTE
from
nyc_2015_tree_census t
join nyc_neighborhoods n on st_intersects(t.geom, n.geom)
join a using (neighborhood)
group by
n.neighborhood,
a.total_trees
with a as (
select
*
from
tree_similarity
where
neighborhood = 'Harlem'
)
select
t.neighborhood,
-- Here we subtract the values from our target neighborhood in table "t"
-- which is Harlem, from the average values across the city
t.pine - a.pine as pine_dif,
t.maple - a.maple as maple_dif,
t.oak - a.oak as oak_dif,
t.linden - a.linden as linden_dif,
t.honeylocust - a.honeylocust as honeylocust_dif
from
tree_similarity t,
a
where
t.neighborhood != 'Harlem'
create table harlem_tree_similarity as with a as (
select
*
from
tree_similarity
where
neighborhood = 'Harlem'
),
-- A: Find the difference between Harlem and all other neighborhoods
b as (
select
t.neighborhood,
t.pine - a.pine as pine_dif,
t.maple - a.maple as maple_dif,
t.oak - a.oak as oak_dif,
t.linden - a.linden as linden_dif,
t.honeylocust - a.honeylocust as honeylocust_dif
from
tree_similarity t,
a
where
t.neighborhood != 'Harlem'
),
-- B: Find the min and max values in each column and store it as an array
c as (
select
array [min(pine_dif), max(pine_dif)] as pine,
array [min(maple_dif), max(maple_dif)] as maple,
array [min(oak_dif), max(oak_dif)] as oak,
array [min(linden_dif), max(linden_dif)] as linden,
array [min(honeylocust_dif), max(honeylocust_dif)] as honeylocust
from
b
),
-- C: Find the absolute value of each difference value, normalize the data, and subtract that value from 1
d as (
select
b.neighborhood,
1 - (abs(b.pine_dif) - c.pine [1]) / (c.pine [2] - c.pine [1]) as pine_norm,
1 - (b.maple_dif - c.maple [1]) / (c.maple [2] - c.maple [1]) as maple_norm,
1 - (b.oak_dif - c.oak [1]) / (c.oak [2] - c.oak [1]) as oak_norm,
1 - (b.linden_dif - c.linden [1]) / (c.linden [2] - c.linden [1]) as linden_norm,
1 - (b.honeylocust_dif - c.honeylocust [1]) / (c.honeylocust [2] - c.honeylocust [1]) as honeylocust_norm
from
b,
c
)
-- D: Add up and divide the values
select
neighborhood,
(
pine_norm + maple_norm + oak_norm + linden_norm + honeylocust_norm
) / 5 as final
from
d
order by
2 desc
create table harlem_tree_similarity_geo as
select
s.*,
h.geom
from
harlem_tree_similarity s
join nyc_hoods h using (neighborhood)
alter table
nyc_2015_tree_census
add
column h3 text
update
nyc_2015_tree_census
set
h3 = h3_lat_lng_to_cell(geom, 10)
create table nyc_bgs_h3s as
select
geoid,
h3_polygon_to_cells(geom, 10)
from
nys_2021_census_block_groups
where
left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
select
geoid,
count(*)
from
nyc_bgs_h3s
group by
geoid
order by
count(*) desc
limit
5
-- Get the count of cells in each block group
with a as (
select
geoid,
count(*) as count
from
nyc_bgs_h3s
group by
geoid
order by
count(*) desc
),
-- Join the total population to each H3 cell
b as (
select
h3.geoid,
h3.h3_polygon_to_cells as h3,
c.population as pop
from
nyc_bgs_h3s h3
join nys_2021_census_block_groups c on h3.geoid = c.geoid
),
-- Find the proportional population by dividing the total
-- population by the H3 cell count per block group
c as (
select
b.pop :: numeric / a.count :: numeric as pop,
b.h3,
b.geoid
from
b
join a using (geoid)
),
-- Find the scaled values for each target data point
d as (
select
c.*,
abs(70000 - bgs.income) as income,
abs(35 - bgs.age) as age
from
c
join nys_2021_census_block_groups bgs using (geoid)
),
-- Get the tree count in each cell
e as (
select
d.h3,
count(t.ogc_fid) as trees
from
d
join nyc_2015_tree_census t on d.h3 :: text = t.h3
group by
d.h3
),
-- Add the min and max values for each data point to an array
f as (
select
array [min(trees), max(trees)] as trees_s,
array [min(pop), max(pop)] as pop_s,
array [min(income), max(income)] as income_s,
array [min(age), max(age)] as age_s
from
e
join d on d.h3 = e.h3
),
-- Join the two previous CTEs together
g as (
select
e.trees,
d.age,
d.income,
d.pop,
d.h3
from
d
join e on d.h3 = e.h3
),
-- Calculate the 0 to 1 index
h as (
select
g.h3,
(
(g.trees :: numeric - f.trees_s [1]) /(f.trees_s [2] - f.trees_s [1])
) as trees_i,
1 - ((g.age - f.age_s [1]) /(f.age_s [2] - f.age_s [1])) as age_i,
1 - (
(g.income - f.income_s [1]) /(f.income_s [2] - f.income_s [1])
) as income_i,
((g.pop - f.pop_s [1]) /(f.pop_s [2] - f.pop_s [1])) as pop_i
from
g,
f
)
-- Add up to find the final index value
select
*,
trees_i + age_i + income_i + pop_i
from
h
create table pharmacy_stats as
select
p.id,
p.amenity,
p.brand,
p.name,
p.geom,
-- Add a buffer of 800 meters for later use
st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326) as buffer,
-- Get the total proportional population for each buffer
sum(
bgs.population * (
st_area(
st_intersection(
st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326),
bgs.geom
)
) / st_area(bgs.geom)
)
) as pop
from
nyc_pharmacies p
join nyc_2021_census_block_groups bgs on st_intersects(p.geom, st_transform(bgs.geom, 4326))
-- Get just the stores for CVS and Duane Reade
where
p.name ilike '%duane%'
or p.name ilike '%cvs%'
group by
p.id,
p.amenity,
p.brand,
p.name,
p.geom
select
*
from
nyc_pharmacies
where
name ilike '%duane%'
or name ilike '%cvs%'
-- CTE with all Duane Reade locations
with dr as (
select
id,
amenity,
brand,
name,
geom,
st_transform(geom, 3857) as geom_3857
from
nyc_pharmacies
where
name ilike '%duane%'
),
-- CTE with all CVS locations
cvs as (
select
id,
amenity,
brand,
name,
geom,
st_transform(geom, 3857) as geom_3857
from
nyc_pharmacies
where
name ilike '%cvs%'
),
-- Find all Duane Reade locations within 200 meters of a CVS
remove_nearest as (
select
dr.*
from
dr,
cvs
where
st_dwithin(dr.geom_3857, cvs.geom_3857, 200)
)
select
count(*)
from
remove_nearest
with dr as (
select
*
from
pharmacy_stats
where
name ilike '%duane%'
)
select
dr.*,
-- Find the area of overlap between the two buffer groups
st_area(
st_intersection(pharmacy_stats.buffer, dr.buffer)
) / st_area(dr.buffer)
from
dr
join pharmacy_stats on st_intersects(dr.buffer, pharmacy_stats.buffer)
where
-- Find the number of pharmacies that have an overlap greater than 75%
st_area(
st_intersection(pharmacy_stats.buffer, dr.buffer)
) / st_area(dr.buffer) >.75
and pharmacy_stats.name ilike '%cvs%'
with a as (
select
-- Union all buffers to find the "before" scenario total area
st_union(
st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326)
) as buffer,
st_area(
st_union(st_buffer(st_transform(p.geom, 3857), 800))
) as area
from
nyc_pharmacies p
join nyc_2021_census_block_groups bgs on st_intersects(
st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326),
st_transform(bgs.geom, 4326)
)
where
p.name ilike '%duane%'
or p.name ilike '%cvs%'
)
select
a.*,
-- Find the total populatino of all combined buffers
sum(
bgs.population * (
st_area(st_intersection(a.buffer, bgs.geom)) / st_area(bgs.geom)
)
) as pop
from
a
join nyc_2021_census_block_groups bgs on st_intersects(a.buffer, st_transform(bgs.geom, 4326))
group by
1,
2
create table duane_reade_ids as with overlap as (
with dr as (
select
*
from
pharmacy_stats
where
name ilike '%duane%'
)
-- Find all the Duane Reade locations that have an overlap of over 75%
select
dr.id
from
dr
join pharmacy_stats on st_intersects(dr.buffer, pharmacy_stats.buffer)
where
st_area(
st_intersection(pharmacy_stats.buffer, dr.buffer)
) / st_area(dr.buffer) >.75
and pharmacy_stats.name ilike '%cvs%'
),
-- Select all Duane Reade stores
dr as (
select
id,
amenity,
brand,
name,
geom,
st_transform(geom, 3857) as geom_3857
from
nyc_pharmacies
where
name ilike '%duane%'
),
-- Select all CVS stores
cvs as (
select
id,
amenity,
brand,
name,
geom,
st_transform(geom, 3857) as geom_3857
from
nyc_pharmacies
where
name ilike '%cvs%'
),
-- Remove all the Duane Reade locations within 200 meters of a CVS
remove_nearest as (
select
dr.id
from
dr,
cvs
where
st_dwithin(dr.geom_3857, cvs.geom_3857, 200)
)
select
id
from
remove_nearest
union
select
id
from
overlap
-- We run the same query but we remove IDs not in the table we just created
with p as (
select
*
from
nyc_pharmacies
where
id not in (
select
id
from
duane_reade_ids
)
),
a as (
select
st_union(
st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326)
) as buffer,
st_area(
st_union(st_buffer(st_transform(p.geom, 3857), 800))
) as area
from
nyc_pharmacies p
join nyc_2021_census_block_groups bgs on st_intersects(
st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326),
st_transform(bgs.geom, 4326)
)
where
p.name ilike '%duane%'
or p.name ilike '%cvs%'
)
select
a.*,
sum(
bgs.population * (
st_area(st_intersection(a.buffer, bgs.geom)) / st_area(bgs.geom)
)
) as pop
from
a
join nyc_2021_census_block_groups bgs on st_intersects(a.buffer, st_transform(bgs.geom, 4326))
group by
1,
2
docker run --name mini-postgis -p 35432:5432 --network="host"
-v /Users/mattforrest/Documents/spatial-sql-book/raster:/mnt/mydata
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master
raster2pgsql -s 4269 -I -C -M mnt/mydata/denver-elevation.tif -t 128x128 -F denver_elevation | psql -d gis -h 127.0.0.1 -p 25432 -U docker -W
select
*
from
denver_elevation_full
with c as (
select
(st_contour(rast, 1, 200.00)).*
from
denver_elevation
where
filename = 'denver-elevation.tif'
)
select
st_transform(geom, 4326),
id,
value
from
c
docker run --name mini-postgis -p 35432:5432 --network="host"
-v /Users/mattforrest/Documents/spatial-sql-book/raster:/mnt/mydata
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master
raster2pgsql -s 4269 -I -C -M mnt/mydata/denver-elevation.tif -t 128x128 -F denver_elevation | psql -d gis -h 127.0.0.1 -p 25432 -U docker -W
select
*
from
denver_elevation_full
with c as (
select
(st_contour(rast, 1, 200.00)).*
from
denver_elevation
where
filename = 'denver-elevation.tif'
)
select
st_transform(geom, 4326),
id,
value
from
c
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis port=25432" nys_mesonet_202307.csv \
-dialect sqlite -sql 'select makepoint(cast("longitude [degrees_east]" as numeric), cast("latitude [degrees_north]" as numeric), 4326) AS geom, * from nys_mesonet_202307' \
-nln nys_mesonet_202307 -lco GEOMETRY_NAME=geom
create table nys_mesonet_raster as with inputs as (
select
-- Sets the pixel size to 0.01 decimal degrees since
-- we are using EPSG 4326
0.01 :: float8 as pixelsize,
-- Sets the smoothing algorithim from "gdal_grid"
'invdist:power:5.5:smoothing:2.0' as algorithm,
st_collect(
-- Creates a geometry collection of our data and forces
-- a Z coodrinate of the max temparature
st_force3dz(
geom,
case
when "apparent_temperature_max [degc]" = '' then null
else "apparent_temperature_max [degc]" :: numeric
end
)
) as geom,
-- Expands the grid to add room aroung the edges
st_expand(st_collect(geom), 0.5) as ext
from
nys_mesonet_202307
where
time_end = '2023-07-23 12:00:00 EDT'
),
sizes AS (
SELECT
ceil((ST_XMax(ext) - ST_XMin(ext)) / pixelsize) :: integer AS width,
ceil((ST_YMax(ext) - ST_YMin(ext)) / pixelsize) :: integer AS height,
ST_XMin(ext) AS upperleftx,
ST_YMax(ext) AS upperlefty
FROM
inputs
)
SELECT
-- Sets 1 as the raster ID since we only have one raster
1 AS rid,
ST_InterpolateRaster(
-- The geometry collection that will be used to interpolate to the raster
geom,
-- The algorithim we defined
algorithm,
ST_SetSRID(
-- This creates the band
ST_AddBand(
-- Creates the empty raster with our arguments from the previous CTEs
ST_MakeEmptyRaster(width, height, upperleftx, upperlefty, pixelsize),
-- Sets the default values as a 32 bit float
'32BF'
),
-- The SRID the raster will use
ST_SRID(geom)
)
) AS rast
FROM
sizes,
inputs
create table denver_ele_h3 as
select
h3_lat_lng_to_cell(a.geom, 11) as h3,
avg(val) as avg_ele
from
(
select
r.*
from
denver_elevation,
lateral ST_PixelAsCentroids(rast, 1) as r
) a
group by
1
select
r.*
from
denver_elevation,
lateral ST_PixelAsCentroids(rast, 1) as r
docker run --name mini-postgis -p 35432:5432 --network="host"
-v /Users/mattforrest/Desktop/Desktop/spatial-sql-book/raster:/mnt/mydata
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master
docker container exec -it mini-postgis bash
apt update
apt install osm2pgrouting
apt install osmctools
osmfilter /mnt/mydata/planet_-74.459,40.488_-73.385,41.055.osm --keep="highway=" \
-o=/mnt/mydata/good_ways.osm
docker run --name mini-postgis -p 35432:5432 --network="host"
-v /Users/mattforrest/Desktop/Desktop/spatial-sql-book/raster:/mnt/mydata
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master
docker container exec -it mini-postgis bash
apt update
apt install osm2pgrouting
apt install osmctools
osmfilter /mnt/mydata/planet_-74.459,40.488_-73.385,41.055.osm --keep="highway=" \
-o=/mnt/mydata/good_ways.osm
osm2pgrouting \
-f "/mnt/mydata/good_ways.osm" \
-d routing \
-p 25432 \
-U docker \
-W docker \
--clean
-- Find the source ID closest to the starting point
with start as (
select
source
from
ways
order by
the_geom <-> st_setsrid(
st_geomfromtext('point (-74.244391 40.498995)'),
4326
)
limit
1
),
-- Find the source ID closest to the end point
destination as (
select
source
from
ways
order by
the_geom <-> st_setsrid(
st_geomfromtext('point (-73.902630 40.912329)'),
4326
)
limit
1
)
-- Run our pgRouting query
select
st_union(the_geom) as route
from
pgr_dijkstra(
'select gid as id, source, target, cost,
reverse_cost, st_length(st_transform(the_geom, 3857))
as cost from ways',
(
select
source
from
start
),
(
select
source
from
destination
),
true
) as di
join ways as pt on di.edge = pt.gid;
select
tag_id,
tag_key,
tag_value
from
configuration
order by
tag_id;
create table car_config as
select
*
from
configuration
alter table
car_config
add
column penalty float;
update car_config set penalty=1
update
car_config
set
penalty = -1.0
where
tag_value in ('steps', 'footway', 'pedestrian');
update
car_config
set
penalty = 5
where
tag_value in ('unclassified');
update
car_config
set
penalty = 0.5
where
tag_value in ('tertiary');
update
car_config
set
penalty = 0.3
where
tag_value in (
'primary',
'primary_link',
'trunk',
'trunk_link',
'motorway',
'motorway_junction',
'motorway_link',
'secondary'
);
with start as (
select
source
from
ways
order by
the_geom <-> st_setsrid(
st_geomfromtext('point (-74.244391 40.498995)'),
4326
)
limit
1
), destination as (
select
source
from
ways
order by
the_geom <-> st_setsrid(
st_geomfromtext('point (-73.902630 40.912329)'),
4326
)
limit
1
)
select
st_union(the_geom) as route
from
pgr_dijkstra(
'select
gid as id,
source,
target,
cost_s * penalty as cost,
reverse_cost_s * penalty as reverse_cost,
st_length(st_transform(the_geom, 3857)) as length
from
ways
join car_config using (tag_id)',
(
select
source
from
start
),
(
select
source
from
destination
),
true
) as di
join ways as pt on di.edge = pt.gid;
osmfilter mnt/mydata/planet_-74.459,40.488_-73.385,41.055.osm \
--keep="highway= route= cycleway= bicycle= segregated=" \
-o=mnt/mydata/bike_ways.osm
osm2pgrouting \
-f "mnt/mydata/bike_ways.osm" \
-d bike \
-p 25432 \
-U docker \
-W docker \
--clean
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=bike port=25432" \
nyc_pharmacies.geojson \
-nln nyc_pharmacies -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=bike port=25432" \
Building_Footprints.geojson \
-nln nyc_building_footprints -lco GEOMETRY_NAME=geom
alter table
configuration
add
column penalty float;
update
configuration
set
penalty = 1.0;
update
configuration
set
penalty = 10.0
where
tag_value in ('steps', 'footway', 'pedestrian');
update
configuration
set
penalty = 0.3
where
tag_key in ('cycleway');
update
configuration
set
penalty = 0.3
where
tag_value in ('cycleway', 'bicycle');
update
configuration
set
penalty = 0.7
where
tag_value in ('tertiary', 'residential');
update
configuration
set
penalty = 1
where
tag_value in ('secondary');
update
configuration
set
penalty = 1.2
where
tag_value in ('primary', 'primary_link');
update
configuration
set
penalty = 2
where
tag_value in (
'trunk',
'trunk_link',
'motorway',
'motorway_junction',
'motorway_link'
);
with pharm as (
select
name,
st_centroid(geom) as geom,
id
from
nyc_pharmacies
where
st_dwithin(
st_centroid(geom) :: geography,
st_setsrid(st_makepoint(-73.9700743, 40.6738928), 4326) :: geography,
3000
)
order by
random()
limit
3
), bldgs as (
select
st_centroid(geom) as bldg_geom,
bin
from
nyc_building_footprints
where
st_dwithin(
st_centroid(geom) :: geography,
st_setsrid(st_makepoint(-73.9700743, 40.6738928), 4326) :: geography,
3000
)
order by
random()
limit
10
),
c as (
-- First we select all the columns from the pharm, bldgs, and wid CTEs and subqueries
select
pharm.*,
bldgs.*,
wid.*
from
-- We perform a cross join to find all possible matches between
-- the 5 pharmacies and the 20 buildings
pharm,
bldgs
cross join lateral (
-- For each row find the start and end way IDs
with start as (
select
source
from
ways
order by
the_geom <-> pharm.geom
limit
1
), destination as (
select
source
from
ways
order by
ways.the_geom <-> st_centroid(bldgs.bldg_geom)
limit
1
)
select
start.source as start_id,
destination.source as end_id
from
start,
destination
) wid
)
select
-- For each combination we get the sum of the cost in distance, seconds, and route length
-- and we repeat this for every row, or possible combinatino
sum(di.cost) as cost,
sum(length) as length,
sum(pt.cost_s) as seconds,
st_union(st_transform(the_geom, 4326)) as route
from
pgr_dijkstra(
'select
gid as id,
source,
target,
cost_s * penalty as cost,
reverse_cost_s * penalty as reverse_cost,
st_length(st_transform(the_geom, 3857)) as length
from ways
join configuration using (tag_id)',
array(
select
distinct start_id
from
c
),
array(
select
distinct end_id
from
c
),
true
) as di
join ways as pt on di.edge = pt.gid
group by
start_vid,
end_vid
-- Find 10 random Rite-Aid locations
with a as (
select
*
from
nyc_pharmacies
where
name ilike '%rite%'
limit
10
)
select
-- For each pharmacy we will find 10 random
-- buildings within 800 meters
a.name,
a.id as pharm_id,
a.geom as pharm_geom,
b.*
from
a
cross join lateral (
select
bin as building_id,
geom
from
nyc_building_footprints
where
st_dwithin(
st_centroid(geom) :: geography,
st_centroid(a.geom) :: geography,
800
)
order by
random()
limit
10
) b
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=bike port=25432" \
rite_aid_odm.csv \
-nln rite_aid_odm -lco GEOMETRY_NAME=geom -oo AUTODETECT_TYPE=true
create table rite_aid_tsp as with a as (
select
distinct b.pharm_id as id,
b.geom,
s.source
from
rite_aid_odm b
cross join lateral(
SELECT
source
FROM
ways
ORDER BY
the_geom <-> b.pharm_geom
limit
1
) s
), b as (
select
b.pharm_id as id,
s.source
from
rite_aid_odm b
cross join lateral(
SELECT
source
FROM
ways
ORDER BY
the_geom <-> b.geom
limit
1
) s
), c as (
select
a.id,
a.source,
-- Constructs an array with the way ID of the pharmacy as the first item
array_prepend(a.source, array_agg(distinct b.source)) as destinations
from
a
join b using (id)
-- Will return one row per pharmacy ID
group by
a.id,
a.source
)
select
*
from
c
create table rite_aid_tsp_odm as
-- Select all the values from the table created in the last step
select
a.*,
r.*
from
rite_aid_tsp a
cross join lateral (
select
*
from
-- This will create the cost matrix for each pharmacy
pgr_dijkstracostmatrix(
'select
gid as id,
source,
target,
cost_s * penalty as cost,
reverse_cost_s * penalty as reverse_cost
from ways
join configuration
using (tag_id)',
-- We can use the array to calculate the distances
-- between all locations in the array
(
select
destinations
from
rite_aid_tsp
where
id = a.id
),
directed := false
)
) r
create table solved_tsp as
select
s.id,
s.source,
tsp.*,
lead(tsp.node, 1) over (
partition by s.source
order by
tsp.seq
) as next_node
from
rite_aid_tsp s
cross join lateral (
select
*
from
pgr_TSP(
$$
select
*
from
rite_aid_tsp_odm
where
source = $$ || s.source || $$$$
)
) tsp
create table final_tsp_test as
with a as (
select
s.id,
s.source,
s.seq,
s.node,
s.next_node,
di.*,
ways.the_geom,
st_length(st_transform(ways.the_geom, 3857)) as length
from
solved_tsp s,
-- We cross join this to each row
pgr_dijkstra(
'select
gid as id,
source,
target,
cost_s,
cost_s * penalty as cost,
reverse_cost_s * penalty as reverse_cost
from ways
join configuration
using (tag_id)',
-- We create a route between the current node and the next node
s.node,
s.next_node,
true
) as di
join ways on di.node = ways.source
)
select
-- Union the geometries and find the sum of the cost and length for each route
st_union(st_transform(the_geom, 4326)) as route,
source,
sum(cost) as cost,
sum(length) as length
from
a
group by
source
with start as (
select
source
from
ways
join car_config using (tag_id)
where
car_config.penalty != -1
order by
the_geom <-> st_setsrid(
st_geomfromtext('POINT (-73.987374 40.722349)'),
4326
)
limit
1
), b as (
select
*
from
pgr_drivingdistance(
'select
gid as id,
source,
target,
cost_s * 2.5 as cost,
reverse_cost_s * 2.5 as reverse_cost
from ways
join car_config
using (tag_id)
where tag_id in (110, 100, 124, 115, 112, 125, 109, 101,
103, 102, 106, 107, 108, 104, 105)',
(
select
source
from
start
),
600
)
)
-- Union the geometries into a single geometry
select
st_union(ways.the_geom)
from
ways
where
gid in (select edge from b)
with start as (
select
source
from
ways
join car_config using (tag_id)
where
car_config.penalty != -1
order by
the_geom <-> st_setsrid(
st_geomfromtext('POINT (-73.987374 40.722349)'),
4326
)
limit
1
), b as (
select
*
from
pgr_drivingdistance(
'select
gid as id,
source,
target,
cost_s * 2.5 as cost,
reverse_cost_s * 2.5 as reverse_cost
from ways
join car_config
using (tag_id)
where tag_id in (110, 100, 124, 115, 112, 125,
109, 101, 103, 102, 106, 107, 108, 104, 105)',
(
select
source
from
start
),
600
)
)
select
-- Creates a concave hull around the entire routes geometry
st_concavehull(
st_transform(
st_union(the_geom), 4326),
0.1) as geom
from
ways
where
gid in (
select
distinct edge
from
b
)
with start as (
select
source
from
ways
join car_config using (tag_id)
where
car_config.penalty != -1
order by
the_geom <-> st_setsrid(
st_geomfromtext('POINT (-73.987374 40.722349)'),
4326
)
limit
1
), b as (
select
*
from
pgr_drivingdistance(
'select gid as id,
source,
target,
cost_s * 2.5 as cost,
reverse_cost_s * 2.5 as reverse_cost
from ways
join car_config
using (tag_id)
where tag_id in (110, 100, 124, 115, 112,
125, 109, 101, 103, 102, 106, 107, 108, 104, 105)',
(
select
source
from
start
),
600
)
)
select
-- Turn it all into a polygon
st_makepolygon(
-- Find the exterior ring which will eliminate islands
st_exteriorring(
-- Create a 20 meter buffer
st_buffer(
st_transform(
st_union(the_geom), 4326) :: geography,
20
) :: geometry
)
) as geom
from
ways
where
gid in (
select
distinct edge
from
b
)
with start as (
select
source
from
ways
join configuration using (tag_id)
where
configuration.penalty <= 1
order by
the_geom <-> st_setsrid(
st_geomfromtext('POINT (-73.987374 40.722349)'),
4326
)
limit
1
), b as (
select
*
from
pgr_drivingdistance(
'select
gid as id,
source,
target,
cost_s * 2.5 as cost,
reverse_cost_s * 2.5 as reverse_cost
from ways
join configuration
using (tag_id)
where penalty <= 1',
(
select
source
from
start
),
600
)
)
select
st_makepolygon(
st_exteriorring(
st_buffer(
st_transform(st_union(the_geom), 4326) :: geography,
20
) :: geometry
)
) as geom
from
ways
where
gid in (
select
distinct edge
from
b
)
CREATE EXTENSION plpython3u;
docker container exec -it docker-postgis-db-1 bash
apt update
apt install python3-pip
python3 --version
pip3 --version
apt-get install gdal-bin
apt-get install libgdal-dev
pip3 install esda matplotlib libpysal spopt geopandas scikit-learn --break-system-packages
CREATE FUNCTION pymax (a integer, b integer)
RETURNS integer
AS $$
if a > b:
return a
return b
$$
LANGUAGE plpython3u;
select
pymax(100, 1)
create function python_limit_5 (tablename TEXT)
returns text
AS $$
data = plpy.execute(f'''select * from {tablename} limit 5''')
return data
$$
LANGUAGE plpython3u;
create table nyc_neighborhoods_no_geom as
select
ogc_fid,
neighborhood,
boroughcode,
borough
from
nyc_neighborhoods
create or replace function python_limit_5 (tablename TEXT)
returns text
AS $$
data = plpy.execute(f'''select * from {tablename} limit 5''')
return data[0]
$$
LANGUAGE plpython3u;
create or replace function python_limit_5 (tablename TEXT)
returns text
AS $$
data = plpy.execute(f'''select * from {tablename} limit 5''')
return data[0]['neighborhood']
$$
LANGUAGE plpython3u;
create
or replace function pysal_esda_test(
tablename TEXT,
geom TEXT,
col TEXT,
id_col TEXT,
similarity FLOAT
)
returns text
AS $$
import esda
import libpysal as lps
from libpysal.weights import W
import json
import numpy as np
import pandas as pd
neighbors = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.neighbors) as neighbors
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col } :: text) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
weights = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.weights) as weights
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
return neighbors
$$
LANGUAGE 'plpython3u';
create
or replace function pysal_esda_test(
tablename TEXT,
geom TEXT,
col TEXT,
id_col TEXT,
similarity FLOAT
)
returns text
AS $$
import esda
import libpysal as lps
from libpysal.weights import W
import json
import numpy as np
import pandas as pd
neighbors = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.neighbors) as neighbors
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col } :: text) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
weights = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.weights) as weights
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
w = W(json.loads(neighbors [0] ['neighbors']), json.loads(weights [0] ['weights']), silence_warnings = True)
w.transform = 'r'
var_data = plpy.execute(
f '''
with a as (
select
distinct b.{ col },
b.{ id_col }
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
)
select
{ col } as data_col
from
a
''')
var_list = []
for i in var_data:
if i ['data_col'] == None:
var_list.append(float(0.0))
else:
var_list.append(float(i ['data_col']))
li = esda.moran.Moran_Local(np.array(var_list), w)
return var_list
$$
LANGUAGE 'plpython3u';
create or replace function pysal_esda (
tablename TEXT,
geom TEXT,
col TEXT,
id_col TEXT,
similarity FLOAT
)
returns table (
id TEXT,
col FLOAT,
local_morani_values FLOAT,
p_values FLOAT,
quadrant TEXT,
geom GEOMETRY
)
AS $$
import esda
import libpysal as lps
from libpysal.weights import W
import json
import numpy as np
import pandas as pd
neighbors = plpy.execute(
f'''
select
json_object_agg(b.{id_col}, a.neighbors) as neighbors
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col} :: text) as neighbors
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
'''
)
weights = plpy.execute(
f'''
select
json_object_agg(b.{id_col}, a.weights) as weights
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col}) as neighbors,
array_fill(
(
case
when count(z.{id_col}) = 0 then 0
else 1 / count(z.{id_col}) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
'''
)
w = W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True)
w.transform = 'r'
var_data = plpy.execute(
f'''
with a as (
select
distinct b.{col},
b.{id_col}
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col}) as neighbors
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
)
select
{col} as data_col
from
a
''')
var_list = []
for i in var_data:
if i['data_col'] == None:
var_list.append(float(0.0))
else:
var_list.append(float(i['data_col']))
li = esda.moran.Moran_Local(np.array(var_list), w)
original = plpy.execute(f'''
with a as (
select
distinct b.{col},
b.{id_col},
b.{geom}
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col}) as neighbors
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
)
select
{id_col},
{col},
{geom}
from
a'''
)
original_data = []
lookup_table = []
for i in original:
original_data.append(i)
lookup_table.append(i[f'{id_col}'])
df = pd.DataFrame.from_dict(original_data)
formatted_data = []
for i in original_data:
dict = i
res = lookup_table.index(i[f'{id_col}'])
dict['local_morani_values'] = li.Is[res]
dict['p_values'] = li.p_sim[res]
dict['quadrant'] = li.q[res]
formatted_data.append(dict)
original_data_df = pd.DataFrame.from_dict(formatted_data)
final = df.merge(original_data_df, how='inner', on=f'{id_col}')
final_df = final.drop([f'{col}_x', f'{geom}_x'], axis=1)
final_df = final_df.rename(columns = {f'{col}_y': 'col', f'{geom}_y': f'{geom}', f'{id_col}': 'id'})
return final_df.to_dict(orient = 'records')
$$
LANGUAGE 'plpython3u';
create table nyc_2021_census_block_groups_morans_i as
select
*
from
nys_2021_census_block_groups
where
left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
and population > 0
create table nyc_bgs_esda as
select
*
from
pysal_esda(
'nyc_2021_census_block_groups_morans_i',
'geom',
'income',
'geoid',
0.05
)
CREATE EXTENSION plpython3u;
docker container exec -it docker-postgis-db-1 bash
apt update
apt install python3-pip
python3 --version
pip3 --version
apt-get install gdal-bin
apt-get install libgdal-dev
pip3 install esda matplotlib libpysal spopt geopandas scikit-learn --break-system-packages
CREATE FUNCTION pymax (a integer, b integer)
RETURNS integer
AS $$
if a > b:
return a
return b
$$
LANGUAGE plpython3u;
select
pymax(100, 1)
create function python_limit_5 (tablename TEXT)
returns text
AS $$
data = plpy.execute(f'''select * from {tablename} limit 5''')
return data
$$
LANGUAGE plpython3u;
create table nyc_neighborhoods_no_geom as
select
ogc_fid,
neighborhood,
boroughcode,
borough
from
nyc_neighborhoods
create or replace function python_limit_5 (tablename TEXT)
returns text
AS $$
data = plpy.execute(f'''select * from {tablename} limit 5''')
return data[0]
$$
LANGUAGE plpython3u;
create or replace function python_limit_5 (tablename TEXT)
returns text
AS $$
data = plpy.execute(f'''select * from {tablename} limit 5''')
return data[0]['neighborhood']
$$
LANGUAGE plpython3u;
create
or replace function pysal_esda_test(
tablename TEXT,
geom TEXT,
col TEXT,
id_col TEXT,
similarity FLOAT
)
returns text
AS $$
import esda
import libpysal as lps
from libpysal.weights import W
import json
import numpy as np
import pandas as pd
neighbors = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.neighbors) as neighbors
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col } :: text) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
weights = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.weights) as weights
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
return neighbors
$$
LANGUAGE 'plpython3u';
create
or replace function pysal_esda_test(
tablename TEXT,
geom TEXT,
col TEXT,
id_col TEXT,
similarity FLOAT
)
returns text
AS $$
import esda
import libpysal as lps
from libpysal.weights import W
import json
import numpy as np
import pandas as pd
neighbors = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.neighbors) as neighbors
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col } :: text) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
weights = plpy.execute(
f '''
select
json_object_agg(b.{ id_col }, a.weights) as weights
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
'''
)
w = W(json.loads(neighbors [0] ['neighbors']), json.loads(weights [0] ['weights']), silence_warnings = True)
w.transform = 'r'
var_data = plpy.execute(
f '''
with a as (
select
distinct b.{ col },
b.{ id_col }
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geom }, b.{ geom })
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
)
select
{ col } as data_col
from
a
''')
var_list = []
for i in var_data:
if i ['data_col'] == None:
var_list.append(float(0.0))
else:
var_list.append(float(i ['data_col']))
li = esda.moran.Moran_Local(np.array(var_list), w)
return var_list
$$
LANGUAGE 'plpython3u';
create or replace function pysal_esda (
tablename TEXT,
geom TEXT,
col TEXT,
id_col TEXT,
similarity FLOAT
)
returns table (
id TEXT,
col FLOAT,
local_morani_values FLOAT,
p_values FLOAT,
quadrant TEXT,
geom GEOMETRY
)
AS $$
import esda
import libpysal as lps
from libpysal.weights import W
import json
import numpy as np
import pandas as pd
neighbors = plpy.execute(
f'''
select
json_object_agg(b.{id_col}, a.neighbors) as neighbors
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col} :: text) as neighbors
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
'''
)
weights = plpy.execute(
f'''
select
json_object_agg(b.{id_col}, a.weights) as weights
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col}) as neighbors,
array_fill(
(
case
when count(z.{id_col}) = 0 then 0
else 1 / count(z.{id_col}) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
'''
)
w = W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True)
w.transform = 'r'
var_data = plpy.execute(
f'''
with a as (
select
distinct b.{col},
b.{id_col}
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col}) as neighbors
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
)
select
{col} as data_col
from
a
''')
var_list = []
for i in var_data:
if i['data_col'] == None:
var_list.append(float(0.0))
else:
var_list.append(float(i['data_col']))
li = esda.moran.Moran_Local(np.array(var_list), w)
original = plpy.execute(f'''
with a as (
select
distinct b.{col},
b.{id_col},
b.{geom}
from
{tablename} b
cross join lateral (
select
array_agg(z.{id_col}) as neighbors
from
{tablename} z
where
st_intersects(z.{geom}, b.{geom})
and z.{id_col} != b.{id_col}
) a
where
a.neighbors is not null
)
select
{id_col},
{col},
{geom}
from
a'''
)
original_data = []
lookup_table = []
for i in original:
original_data.append(i)
lookup_table.append(i[f'{id_col}'])
df = pd.DataFrame.from_dict(original_data)
formatted_data = []
for i in original_data:
dict = i
res = lookup_table.index(i[f'{id_col}'])
dict['local_morani_values'] = li.Is[res]
dict['p_values'] = li.p_sim[res]
dict['quadrant'] = li.q[res]
formatted_data.append(dict)
original_data_df = pd.DataFrame.from_dict(formatted_data)
final = df.merge(original_data_df, how='inner', on=f'{id_col}')
final_df = final.drop([f'{col}_x', f'{geom}_x'], axis=1)
final_df = final_df.rename(columns = {f'{col}_y': 'col', f'{geom}_y': f'{geom}', f'{id_col}': 'id'})
return final_df.to_dict(orient = 'records')
$$
LANGUAGE 'plpython3u';
create table nyc_2021_census_block_groups_morans_i as
select
*
from
nys_2021_census_block_groups
where
left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
and population > 0
create table nyc_bgs_esda as
select
*
from
pysal_esda(
'nyc_2021_census_block_groups_morans_i',
'geom',
'income',
'geoid',
0.05
)
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=routing port=25432" \
FDNY_Firehouse_Listing.csv \
-dialect sqlite -sql \
"select *, makepoint(cast(Longitude as float), cast(Latitude as float), 4326) as geom from FDNY_Firehouse_Listing" \
-nln nyc_fire_stations -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=routing port=25432" nyc_mappluto_22v3_shp/MapPLUTO.shp \
-nln building_footprints -lco GEOMETRY_NAME=geom \
-nlt MULTIPOLYGON -mapFieldType Real=String
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=routing port=25432" \
-dialect SQLite -sql "SELECT neighborhood, boroughcode, borough, \
ST_Area(st_collect(st_transform(geometry, 3857))) AS area, \
st_collect(geometry) as geom FROM nyc_hoods \
group by neighborhood, boroughcode, borough" \
nyc_hoods.geojson \
-nln nyc_neighborhoods -lco GEOMETRY_NAME=geom -nlt PROMOTE_TO_MULTI
create table new_stations (name text, geom geometry)
insert into
new_stations (name, geom)
values
(
'Withers and Woodpoint',
st_setsrid(st_makepoint(-73.941455, 40.717628), 4326)
),
(
'Grand and Graham',
st_setsrid(st_makepoint(-73.943791, 40.711584), 4326)
),
(
'Berry and N 11th',
st_setsrid(st_makepoint(-73.9566498, 40.7207395), 4326)
)
create table all_stations as
select
facilityname as name,
geom
from
nyc_fire_stations
where
-- Finding all the stations in our three target neighborhoods
st_intersects(
geom,
(
select
st_union(st_transform(geom, 4326))
from
nyc_neighborhoods
where
neighborhood in (
'Williamsburg',
'East Williamsburg',
'Greenpoint'
)
)
)
union
select
*
from
new_stations
create table bk_blgds as
select
*
from
building_footprints
where
st_intersects(
st_transform(geom, 4326),
-- Find all the buildings that are in the three neighborhoods
-- and within 100 meters
(
select
st_buffer(st_union(geom) :: geography, 100) :: geometry
from
nyc_neighborhoods
where
neighborhood in (
'Williamsburg',
'East Williamsburg',
'Greenpoint'
)
)
)
create table fire_odm as
with starts as (
-- Aggregate the source way ID into an array for
-- the closest way ID to each station
select
array_agg(z.source)
from
all_stations
cross join lateral (
select
source
from
ways
join car_config c using (tag_id)
where
-- Here we are picking all the way tags that
-- are not in this list
c.tag_value not in (
'track',
'bridleway',
'bus_guideway',
'byway',
'cycleway',
'path',
'track',
'grade1',
'grade2',
'grade3',
'grade4',
'grade5',
'unclassified',
'footway',
'pedestrian',
'steps'
)
order by
the_geom <-> all_stations.geom
limit
1
) z
),
-- Here we do the same with the destinations for the buildings
destinations as (
select
array_agg(z.source)
from
bk_blgds
cross join lateral (
select
source
from
ways
join car_config c using (tag_id)
where
c.tag_value not in (
'track',
'bridleway',
'bus_guideway',
'byway',
'cycleway',
'path',
'track',
'grade1',
'grade2',
'grade3',
'grade4',
'grade5',
'unclassified',
'footway',
'pedestrian',
'steps'
)
order by
ways.the_geom <-> st_transform(st_centroid(bk_blgds.geom), 4326)
limit
1
) z
-- We only select the buildings in the neighborhood boundaries
where
st_intersects(
st_transform(geom, 4326),
(
select
st_buffer(st_union(geom) :: geography, 100) :: geometry
from
nyc_neighborhoods
where
neighborhood in (
'williamsburg',
'east williamsburg',
'greenpoint'
)
)
)
)
select
*
from
-- Here we pass these arguments to the pgr_bdDijkstraCost function
pgr_bddijkstracost(
$$
select
id,
source,
target,
cost_s as cost,
reverse_cost_s as reverse_cost
from
ways
join car_config using (tag_id)
where
st_intersects(
st_transform(the_geom, 4326),
(
select
st_buffer(st_union(geom) :: geography, 100) :: geometry
from
nyc_neighborhoods
where
neighborhood in (
'williamsburg',
'east williamsburg',
'greenpoint'
)
)
) $$,
-- We pass the arrays from above as the two arguments
(
select
*
from
starts
),
(
select
*
from
destinations
),
true
);
create
or replace function spopt_pmedian(
odm_table TEXT,
optimal_facilities INT,
clients_table TEXT,
facilities_table TEXT
)
returns table (
facility TEXT,
end_vid INT,
ogc_fid INT,
geom GEOMETRY
) AS $$
import spopt
from spopt.locate import PMedian
import numpy
import pulp
import pandas as pd
odm = plpy.execute(f''' select * from {odm_table} ''')
odm_new = []
for i in odm:
odm_new.append({'start_vid': i['start_vid'], 'end_vid': i['end_vid'], 'agg_cost': i['agg_cost']})
solver = pulp.PULP_CBC_CMD(keepFiles=True)
df = pd.DataFrame.from_dict(odm_new)
data = df.pivot_table(index='end_vid', columns='start_vid', values='agg_cost', fill_value=1000000).values
vals = df.pivot_table(index='end_vid', columns='start_vid', values='agg_cost', fill_value=1000000)
clients = data.shape[0]
weights = [1] * clients
pmedian_from_cm = PMedian.from_cost_matrix(
numpy.array(data),
numpy.array(weights),
p_facilities=int(optimal_facilities),
name="p-median-network-distance" )
pmedian_from_cm = pmedian_from_cm.solve(solver)
station_ids = plpy.execute(f'''
select
z.source as end_vid,
{facilities_table}.name
from
{facilities_table}
cross join lateral (
SELECT
source
FROM
ways
join configuration c using (tag_id)
where
c.tag_value not in (
'track',
'bridleway',
'bus_guideway',
'byway',
'cycleway',
'path',
'track',
'grade1',
'grade2',
'grade3',
'grade4',
'grade5',
'unclassified',
'footway',
'pedestrian',
'steps'
)
ORDER BY
ways.the_geom <-> st_transform(st_centroid({facilities_table}.geom), 4326)
limit
1
) z
''')
stations = []
for i in station_ids:
stations.append(i)
stations_df = pd.DataFrame.from_dict(stations)
cleaned_points = []
for i in pmedian_from_cm.fac2cli:
if len(i) > 0:
z = stations_df[stations_df['end_vid'] == vals.columns[pmedian_from_cm.fac2cli.index(i)]]['name'].values[0]
for j in i:
struct = {'facility': z, 'end_vid': list(vals.index)[j]}
cleaned_points.append(struct)
df_startids = pd.DataFrame.from_dict(cleaned_points)
orig_data = plpy.execute(f'''
select
z.source as end_vid,
{clients_table}.ogc_fid,
st_transform(st_centroid({clients_table}.geom), 4326) as geom
from
{clients_table}
cross join lateral (
SELECT
source
FROM
ways
join car_config c using (tag_id)
where
c.tag_value not in (
'track',
'bridleway',
'bus_guideway',
'byway',
'cycleway',
'path',
'track',
'grade1',
'grade2',
'grade3',
'grade4',
'grade5',
'unclassified',
'footway',
'pedestrian',
'steps'
)
ORDER BY
ways.the_geom <-> st_transform(st_centroid({clients_table}.geom), 4326)
limit
1
) z
''')
orig_formatted = []
for i in orig_data:
orig_formatted.append(i)
orig_df = pd.DataFrame.from_dict(orig_formatted)
final_df = orig_df.merge(df_startids, how='left', on='end_vid')
final_df = final_df.replace(numpy.nan, None)
return final_df.to_dict(orient='records')
$$
LANGUAGE 'plpython3u';
create table bk_final as
select
*
from
spopt_pmedian('fire_odm', 5, 'bk_blgds', 'all_stations');
create table final_stations as
select
*
from
all_stations
where
name in (
select
distinct facility
from
bk_final
)
create
or replace function pysal_skater(
tablename TEXT,
geometry TEXT,
col TEXT,
id_col TEXT,
n_clusters INT,
floor INT
) returns table (id TEXT, col FLOAT, regions INT, geometry TEXT)
AS $$
import spopt
import libpysal as lps
import json
import numpy as np
import pandas as pd
import geopandas as gpd
from sklearn.metrics import pairwise as skm
import numpy
from shapely import wkt
neighbors = plpy.execute(f'''
select
json_object_agg(b.{ id_col }, a.neighbors) as neighbors
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col } :: text) as neighbors
from
{ tablename } z
where
st_intersects(z.{ geometry }, b.{ geometry })
and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
''')
weights = plpy.execute(f'''
select
json_object_agg(b.{ id_col }, a.weights) as weights
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geometry }, b.{ geometry })
and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
''')
ids = []
for i in json.loads(weights[0]['weights']):
ids.append(i)
w = lps.weights.W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True, id_order=ids)
w.transform='r'
to_gdf = plpy.execute(f'''
select
st_astext(st_transform({ geometry }, 4326)) as geometry,
{ col } :: numeric as col,
{ id_col } as id
from
{ tablename } b
cross join lateral (
select
array_agg(z.{ id_col }) as neighbors,
array_fill(
(
case
when count(z.{ id_col }) = 0 then 0
else 1 / count(z.{ id_col }) :: numeric
end
),
array [count(z.{id_col})::int]
) as weights
from
{ tablename } z
where
st_intersects(z.{ geometry }, b.{ geometry })
and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
and z.{ id_col } != b.{ id_col }
) a
where
a.neighbors is not null
''')
gdf_data = []
for i in to_gdf:
gdf_data.append(i)
gdf = gpd.GeoDataFrame(gdf_data)
spanning_forest_kwds = dict(
dissimilarity=skm.manhattan_distances,
affinity=None,
reduction=numpy.sum,
center=numpy.mean,
verbose=False
)
model = spopt.region.Skater(
gdf,
w,
['col'],
n_clusters=n_clusters,
floor=floor,
trace=False,
islands='increase',
spanning_forest_kwds=spanning_forest_kwds
)
model.solve()
gdf['regions'] = model.labels_
return gdf.to_dict(orient = 'records')
$$
LANGUAGE 'plpython3u';
create table bklyn_bgs as
select
*
from
nyc_2021_census_block_groups
where
left(right(geoid, 10), 3) = '047'
create table brklyn_bgs_skater as
select
*,
st_geomfromtext(geometry) as geom
from
pysal_skater('bklyn_bgs', 'geom', 'population', 'geoid', 8, 250)