3 - Setting up

3.4 Installing docker-postgis

3.1

select
    *
from
    cb_2018_us_county_500k
where
    statefp = '55'

3.2

select
    id,
    st_centroid(geom) as geom
from
    cb_2018_us_county_500k
where
    statefp = '55'

3.3

create
or relace view wi_centroids AS
select
    id,
    st_centroid(geom) as geom
from
    cb_2018_us_county_500k
where
    statefp = '55'

3 - Setting up

3.4 Installing docker-postgis

3.1

select
    *
from
    cb_2018_us_county_500k
where
    statefp = '55'

3.2

select
    id,
    st_centroid(geom) as geom
from
    cb_2018_us_county_500k
where
    statefp = '55'

3.3

create
or relace view wi_centroids AS
select
    id,
    st_centroid(geom) as geom
from
    cb_2018_us_county_500k
where
    statefp = '55'

4 - Thinking in SQL

4.3 Database organization and design

4.1

select
  name,

  -- Use the length() function to find the number of letters, or length of the name
  length(name),

  -- Use the pg_column_size() function to find the size of the column in bytes
  pg_column_size(name),

  -- Here we are doing a few things
  -- 1. Getting the size of the geom column in bytes using ST_MemSize()
  -- 2. Casting the result of ST_MemSize() to a numeric value since 
  --    ST_MemSize() to format it as a number implicitly
  -- 3. Use pg_size_pretty() to format the bytes in human readable sizes like kb or mb
  pg_size_pretty(st_memsize(geom) :: numeric)
from
  cb_2018_us_county_500k 

  -- Using order by to order the results of ST_MemSize() 
  -- from largest to smallest, or descending using desc
order by
  st_memsize(geom) desc

4.2

select
  name,

  -- Calculate the total number of points in the geometry using ST_NPoints()
  st_npoints(geom) as n_points,

  -- Calculate the size of the geometry using ST_MemSize()
  st_memsize(geom) as size_bytes,
  
  -- Using the formula we saw, calculate the size of the geometry as: 40 + ( no. of points * 16)
  40 + (16 * st_npoints(geom)) as calculated_size
from
  cb_2018_us_county_500k
order by
  st_memsize(geom) desc

4.3

select
  name,

  -- Use ST_GeometryType to see our geometry type
  st_geometrytype(geom) as geom_type,
  st_memsize(geom) as size_bytes,

  -- Use ST_NumGeometries to see our geometry type
  st_numgeometries(geom) as num_geoms,
  
  -- Try and calculate using the formula (no. of geometries * 40) + (no. of points * 16)
  (st_numgeometries(geom) * 40) + (16 * st_npoints(geom)) as calculated_size
from
  cb_2018_us_county_500k
order by
  st_memsize(geom) desc

4.4

select
  name,
  st_geometrytype(geom) as geom_type,
  st_memsize(geom) as size_bytes,
  st_numgeometries(geom) as num_geoms,

  -- Try and calculate using the formula 32 + ((no. of geometries * 16) + (no. of points * 16))
  
  32 + (
    (st_numgeometries(geom) * 16) + (16 * st_npoints(geom))
  ) as calculated_size
from
  cb_2018_us_county_500k
order by
  st_memsize(geom) desc

4.5

select
    pc.geom,
    pc.postal_code,
    count(customers.customer_id)
from
    postal_codes pc
    join customers using (postal_code)
group by
    pc.postal_code

4.6

select
    *
from
    street_centerlines_current
where
    date_added between '01-01-2022'
    and '06-30-2022'

4 - Thinking in SQL

4.3 Database organization and design

4.1

select
  name,

  -- Use the length() function to find the number of letters, or length of the name
  length(name),

  -- Use the pg_column_size() function to find the size of the column in bytes
  pg_column_size(name),

  -- Here we are doing a few things
  -- 1. Getting the size of the geom column in bytes using ST_MemSize()
  -- 2. Casting the result of ST_MemSize() to a numeric value since 
  --    ST_MemSize() to format it as a number implicitly
  -- 3. Use pg_size_pretty() to format the bytes in human readable sizes like kb or mb
  pg_size_pretty(st_memsize(geom) :: numeric)
from
  cb_2018_us_county_500k 

  -- Using order by to order the results of ST_MemSize() 
  -- from largest to smallest, or descending using desc
order by
  st_memsize(geom) desc

4.2

select
  name,

  -- Calculate the total number of points in the geometry using ST_NPoints()
  st_npoints(geom) as n_points,

  -- Calculate the size of the geometry using ST_MemSize()
  st_memsize(geom) as size_bytes,
  
  -- Using the formula we saw, calculate the size of the geometry as: 40 + ( no. of points * 16)
  40 + (16 * st_npoints(geom)) as calculated_size
from
  cb_2018_us_county_500k
order by
  st_memsize(geom) desc

4.3

select
  name,

  -- Use ST_GeometryType to see our geometry type
  st_geometrytype(geom) as geom_type,
  st_memsize(geom) as size_bytes,

  -- Use ST_NumGeometries to see our geometry type
  st_numgeometries(geom) as num_geoms,
  
  -- Try and calculate using the formula (no. of geometries * 40) + (no. of points * 16)
  (st_numgeometries(geom) * 40) + (16 * st_npoints(geom)) as calculated_size
from
  cb_2018_us_county_500k
order by
  st_memsize(geom) desc

4.4

select
  name,
  st_geometrytype(geom) as geom_type,
  st_memsize(geom) as size_bytes,
  st_numgeometries(geom) as num_geoms,

  -- Try and calculate using the formula 32 + ((no. of geometries * 16) + (no. of points * 16))
  
  32 + (
    (st_numgeometries(geom) * 16) + (16 * st_npoints(geom))
  ) as calculated_size
from
  cb_2018_us_county_500k
order by
  st_memsize(geom) desc

4.5

select
    pc.geom,
    pc.postal_code,
    count(customers.customer_id)
from
    postal_codes pc
    join customers using (postal_code)
group by
    pc.postal_code

4.6

select
    *
from
    street_centerlines_current
where
    date_added between '01-01-2022'
    and '06-30-2022'

4.5 Projections

4.7

insert into
    spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext)
values
    (
        104726,
        'ESRI',
        104726,
        '+proj=longlat +a=6378418.941 +rf=298.257222100883 +no_defs +type=crs',
        'GEOGCS["GCS_NAD_1983_HARN_Adj_MN_Hennepin",DATUM["D_NAD_1983_HARN_Adj_MN_Hennepin",SPHEROID["S_GRS_1980_Adj_MN_Hennepin",6378418.941,298.257222100883,AUTHORITY["ESRI","107726"]],AUTHORITY["ESRI","106726"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["ESRI","104726"]]'
    );

4.8

create view mn_104726 as
select
  id,
  st_transform(geom, 104726)
from
  cb_2018_us_county_500k
where
  statefp = '27'

4.6 Thinking in SQL

4.9

-- First we select your data from the ZIP codes table 
-- and aggregate or count the total number of records 
-- from the NYC 311 data
select
  zips.zipcode,
  zips.geom,
  count(nyc_311.*) as count
  
-- Then we join the tables on a common ID, in this case the ZIP code
from
  nyc_zips zips
  join nyc_311 on nyc_311.incident_zip = zips.zipcode 
  
-- Then we filter using WHERE to the right complaint type 
-- and group the results by the ZIP code and geometry
where
  nyc_311.complaint_type = 'Illegal Parking'
group by
  zips.zipcode,
  zips.geom

4.7 Optimizing our queries and other tips

4.10

select
    zips.zipcode,
    zips.geom,
    count(nyc_311.id) as count
from
    nyc_zips zips
    join nyc_311 on nyc_311.incident_zip = zips.zipcode
where
    nyc_311.complaint_type = 'Illegal Parking'
group by
    zips.zipcode,
    zips.geom

4.11

-- In this CTE, which has an alias of "a", 
-- we pull the data we need from the nyc_311 data 
-- and filter to just the results that match "Illegal Parking"
with a as (
  select
    id,
    incident_zip as zipcode
  from
    nyc_311
  where
    nyc_311.complaint_type = 'Illegal Parking'
) 

-- We then join the data from our "temporary table" a to the zipcode data
select
  zips.zipcode,
  zips.geom,
  count(a.id) as count
from
  nyc_zips zips
  join a using (zipcode)
group by
  zips.zipcode,
  zips.geom

4.12

-- Now we have our entire aggregation in the CTE
with a as (
    select
        count("unique key") as total,
        "incident zip" as zipcode
    from
        nyc_311
    where
        nyc_311."complaint type" = 'Illegal Parking'
    group by
        "incident zip"
)
select
    zips.zipcode,
    zips.geom,
    a.total
from
    nyc_zips zips
    join a using (zipcode)

4.8 Using pseudo-code and "rubber ducking"

4.13

select
    *
from
    nyc_311
where
    DATE_PART('month', "created date") = 7
    and "complaint type" = 'Illegal Fireworks'

4.14

select
    *
from
    nyc_311
where
    date_part('month', "created date" :: date) = 7
    and "complaint type" = 'Illegal Fireworks'

5 - SQL Basics

5.2 ogr2ogr

5.1

ogr2ogr \
    -f PostgreSQL PG:"host=localhost user=user password=password \
    dbname=geo" /Users/matt/Documents/spatial-sql-book/nyc_taxi_yellow_0616-07.parquet \
    -nln nyc_taxi_yellow_0616 -lco GEOMETRY_NAME=geom

5.2

docker run --rm -v /Users:/Users --network="host" osgeo/gdal 
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" \
/Users/mattforrest/Documents/spatial-sql-data/nyc_yellow_taxi_0601_0615_2016.parquet \
-nln nyc_yellow_taxi_0601_0615_2016 -lco GEOMETRY_NAME=geom

5.3

create table nyc_311 (
   id int primary key,
   created_date timestamp,
   closed_date timestamp,
   agency text,
   agency_name text,
   complaint_type text,
   descriptor text,
   location_type text,
   incident_zip text,
   incident_address text,
   street_name text,
   cross_street_1 text,
   cross_street_2 text,
   intersection_street_1 text,
   intersection_street_2 text,
   address_type text,
   city text,
   landmark text,
   facility_type text,
   status text,
   due_date timestamp,
   resolution_description text,
   resolution_action_updated_date timestamp,
   community_board text,
   bbl text,
   borough text,
   x_coordinate_planar numeric,
   y_coordinate_planar numeric,
   open_data_channel_type text,
   park_facility_name text,
   park_borough text,
   vehicle_type text,
   taxi_company_borough text,
   taxi_pickup_location text,
   bridge_highway_name text,
   bridge_highway_description text,
   road_ramp text,
   bridge_highway_segment text,
   latitude numeric,
   longitude numeric,
   location text
)

5.4

docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/Building_Footprints.geojson 
\ -nln nyc_building_footprints -lco GEOMETRY_NAME=geom

5.5

docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/2015_NYC_Tree_Census.geojson \
-nln nyc_2015_tree_census -lco GEOMETRY_NAME=geom

5.6

docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/nyc_mappluto_22v3_shp/MapPLUTO.shp \
-nln nyc_mappluto -lco GEOMETRY_NAME=geom \
-nlt MULTIPOLYGON -mapFieldType Real=String

5 - SQL Basics

5.2 ogr2ogr

5.1

ogr2ogr \
    -f PostgreSQL PG:"host=localhost user=user password=password \
    dbname=geo" /Users/matt/Documents/spatial-sql-book/nyc_taxi_yellow_0616-07.parquet \
    -nln nyc_taxi_yellow_0616 -lco GEOMETRY_NAME=geom

5.2

docker run --rm -v /Users:/Users --network="host" osgeo/gdal 
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" \
/Users/mattforrest/Documents/spatial-sql-data/nyc_yellow_taxi_0601_0615_2016.parquet \
-nln nyc_yellow_taxi_0601_0615_2016 -lco GEOMETRY_NAME=geom

5.3

create table nyc_311 (
   id int primary key,
   created_date timestamp,
   closed_date timestamp,
   agency text,
   agency_name text,
   complaint_type text,
   descriptor text,
   location_type text,
   incident_zip text,
   incident_address text,
   street_name text,
   cross_street_1 text,
   cross_street_2 text,
   intersection_street_1 text,
   intersection_street_2 text,
   address_type text,
   city text,
   landmark text,
   facility_type text,
   status text,
   due_date timestamp,
   resolution_description text,
   resolution_action_updated_date timestamp,
   community_board text,
   bbl text,
   borough text,
   x_coordinate_planar numeric,
   y_coordinate_planar numeric,
   open_data_channel_type text,
   park_facility_name text,
   park_borough text,
   vehicle_type text,
   taxi_company_borough text,
   taxi_pickup_location text,
   bridge_highway_name text,
   bridge_highway_description text,
   road_ramp text,
   bridge_highway_segment text,
   latitude numeric,
   longitude numeric,
   location text
)

5.4

docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/Building_Footprints.geojson 
\ -nln nyc_building_footprints -lco GEOMETRY_NAME=geom

5.5

docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/2015_NYC_Tree_Census.geojson \
-nln nyc_2015_tree_census -lco GEOMETRY_NAME=geom

5.6

docker run --rm -v //Users:/Users --network="host" osgeo/gdal \
ogr2ogr -f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis" \
/Users/matt/Desktop/Desktop/spatial-sql-book/nyc_mappluto_22v3_shp/MapPLUTO.shp \
-nln nyc_mappluto -lco GEOMETRY_NAME=geom \
-nlt MULTIPOLYGON -mapFieldType Real=String

5.3 SQL Data Types

5.7

select
    complaint_type,
    location_type,
    city
from
    nyc_311
limit
    5

5.8

select
    complaint_type,
    location_type,
    city
from
    nyc_311
where
    city = 'Brooklyn'
limit
    5

5.9

select
    complaint_type,
    location_type,
    city,
    city = 'BROOKLYN'
from
    nyc_311
limit
    5

5.4 Charecters

5.10

select
    spc_common,
    nta_name, health
from
    nyc_2015_tree_census
limit
    5

5.11

select
    spc_common || ' - ' || health as joined
from
    nyc_2015_tree_census
limit
    5

5.12

select
    concat(spc_common, ' - ', health)
from
    nyc_2015_tree_census
limit
    5

5.13

select
    spc_common,
    left(spc_common, 5) as left,
    right(spc_common, 3) as right
from
    nyc_2015_tree_census
limit
    5

5.14

select
    spc_common,
    initcap(spc_common) as titlecase,
    upper(spc_common) as uppercase,
    lower(spc_common) as lowercase
from
    nyc_2015_tree_census
limit
    5

5.15

select
    spc_common,
    replace(spc_common, 'locust', ' locust') as new_text
from
    nyc_2015_tree_census
limit
    5

5.16

select
    spc_common,
    reverse(spc_common) as backwards
from
    nyc_2015_tree_census
limit
    5

5.17

select
    spc_common,
    length(spc_common) as how_long
from
    nyc_2015_tree_census
limit
    5

5.18

select
    spc_common,
    split_part(spc_common, ' ', 2) as tree_group
from
    nyc_2015_tree_census
limit
    5

5.19

select
	spc_common,
	split_part(
		replace(spc_common, 'locust', ' locust'),
		' ',
		2
	) as tree_group
from
	nyc_2015_tree_census
limit
	5

5.5 Numeric

5.20

select
    total_amount,
    tip_amount,
    tip_amount /(total_amount - tip_amount) as tip_percent
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.6 Dates and Times

5.21

select
    pickup_datetime,
    dropoff_datetime
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.22

select
    to_char(pickup_datetime, 'DD Mon YYYY') as start_date,
    to_char(dropoff_datetime, 'DD Mon YYYY') as end_date
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.23

select
    to_char(pickup_datetime, 'D Month YYYY') as start_date,
    to_char(dropoff_datetime, 'D Month YYYY') as end_date
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.24

select
    to_char(pickup_datetime, 'Day, Month FMDDth, YYYY') as start_date,
    to_char(dropoff_datetime, 'Day, Month FMDDth, YYYY ') as end_date
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.25

select
    dropoff_datetime - pickup_datetime as duration
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.26

select
    extract(
        dow
        from
            pickup_datetime
    ) as day_of_week,
    extract(
        hour
        from
            pickup_datetime
    ) as hour_of_day
from
    nyc_yellow_taxi_0601_0615_2016
where
    tip_amount > 0
    and trip_distance > 2
limit
    5

5.7 Other data types

5.27

with json_table as (
    select
        JSON(
            '{"first": "Matt",
            "last": "Forrest",
            "age": 35}'
        ) as data
)

select
    data
from
    json_table 

5.28

with json_table as (
    select
        JSON(
            '{"first": "Matt",
            "last": "Forrest",
            "age": 35}'
        ) as data
)
select
    data -> 'last'
from
    json_table

5.29

with json_table as (
    select
        JSON(
            '{
                "first": "Matt",
                "last": "Forrest",
                "age": 35,
                "cities": ["Minneapolis", "Madison", "New York City"],
                "skills": {"SQL": true, "Python": true, "Java": false}
            }'
        ) as data
)
select
    data
from
    json_table

5.30

with json_table as (
    select
        JSON(
            '{
                "first": "Matt",
                "last": "Forrest",
                "age": 35,
                "cities": ["Minneapolis", "Madison", "New York City"],
                "skills": {"SQL": true, "Python": true, "Java": false}
            }'
        ) as data
)
select
    data -> 'cities' -> 1 as city,
    data -> 'skills' -> 'SQL' as sql_skills
from
    json_table

5.31

select
    cast(zipcode as numeric)
from
    nyc_zips
limit
    3

5.32

select
    zipcode :: numeric
from
    nyc_zips
limit
    3

5.8 Basic SQL Operators

5.33

select
    *
from
    nyc_2015_tree_census
where
    health = 'Fair'

5.34

select
    *
from
    nyc_2015_tree_census
where
    stump_diam > 0

5.35

select
    *
from
    nyc_2015_tree_census
where
    stump_diam :: numeric > 0

5.36

select
    spc_common
from
    nyc_2015_tree_census
where
    spc_common > 'Maple'

5.37

select
    *
from
    public.nyc_311
where
    complaint_type = 'Illegal Fireworks'
    and city = 'BROOKLYN'
limit
    25

5.38

select
    *
from
    public.nyc_311
where
    complaint_type = 'Illegal Fireworks'
    or agency = 'NYPD'
limit
    25

5.39

select
    *
from
    nyc_311
where
    complaint_type IN ('Illegal Fireworks', 'Noise - Residential')
limit
    25

5.40

select
    *
from
    nyc_311
where
    complaint_type IN ('Illegal Fireworks', 'Noise - Residential')
    and descriptor IN ('Loud Music/Party', 'Banging/Pounding')
limit
    25

5.41

select
    *
from
    nyc_311
where
    complaint_type IN ('Illegal Fireworks', 'Noise - Residential')
    and descriptor NOT IN ('Loud Music/Party', 'Banging/Pounding')
limit
    25

5.42

select
    *
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:05:00'

5.43

-- All return true since it matches the exact word

select
    'maple' like '%maple', --true  
    'maple' like 'maple%', --true  
    'maple' like '%maple%' --true  
    
-- The first returns false since the phrase does not end in the word "maple" 
select  
    'maple syrup' like '%maple', --false  
    'maple syrup' like 'maple%', --true  
    'maple syrup' like '%maple%' --true   

-- The second returns false since the phrase does not begin with the word "maple"  

select  
    'red maple' like '%maple', --true  
    'red maple' like 'maple%', --false  
    'red maple' like '%maple%' --true

5.44

select
    spc_common
from
    public.nyc_2015_tree_census
where
    spc_common like '%maple%

5.45

SELECT
    'maple' like 'm___', --false
    'maple' like 'm____', --true 
    'maple' like 'm_____' --false

5.46

select
    *
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_longitude IS NULL

5.47

select
    distinct spc_common
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
limit
    5

5.9 Aggregates and GROUP BY

5.48

select
    nta_name,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
group by
    nta_name
limit
    5

5.49

select
    nta_name,
    problems,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
group by
    nta_name,
    problems
limit
    5

5.50

select
    nta_name,
    array_agg(distinct curb_loc),
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
group by
    nta_name
limit
    3

5.51

select
    nta_name,
    avg(stump_diam :: numeric)
from
    nyc_2015_tree_census
where
    stump_diam :: numeric > 0
group by
    nta_name
limit
    3

5.52

select
    passenger_count,
    avg(tip_amount) filter (
        where
            tip_amount > 5
    )
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:05:00'
group by
    passenger_count

5.53

select
    passenger_count,
    avg(tip_amount) filter (
        where
            tip_amount > 5
    ),
    count(ogc_fid)
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:05:00'
    and count(ogc_fid) > 50
group by
    passenger_count

5.54

select
    passenger_count,
    avg(tip_amount) filter (
        where
            tip_amount > 5
    ),
    count(ogc_fid)
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:05:00'
group by
    passenger_count
having
    count(ogc_fid) > 50

5.55

select
    passenger_count,
    tip_amount
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:05:00'
order by
    tip_amount desc
limit
    5

5.56

select
    nta_name,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
group by
    nta_name
order by
    count(ogc_fid) desc
limit
    5

5.57

select
    nta_name,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
group by
    nta_name
order by
    count(ogc_fid) desc
limit
    5 offset 5

6 - Advanced SQL topics for spatial SQL

6.1 CASE/WHEN Conditionals

6.2

case
    when temp > 35 then 'Super Hot !'
    when temp between 30
    and 35 then 'Hot'
    when temp between 25
    and 30 then 'Pretty Warm'
    when temp between 20
    and 25 then 'Warm'
    when temp between 15
    and 20 then 'Cool / Warm'
    when temp between 10
    and 15 then 'Cool'
    when temp between 5
    and 10 then 'Chilly'
    when temp between 0
    and 5 then 'Cold'
    when temp between -5
    and 0 then 'Pretty Cold'
    when temp between -10
    and -5 then 'Very Cold'
    when temp between -15
    and -10 then 'Brrrrr'
    when temp > -15 then 'Frigid !'
    else null
end

6.3

select
    spc_common,
    case
        when spc_common like '%maple%' then 'Maple'
        else 'Not a Maple'
    end as is_maple
from
    nyc_2015_tree_census
limit
    10

6.4

select
    fare_amount,
    tip_amount,
    case
        when tip_amount / fare_amount between.15
        and.2 then 'Good'
        when tip_amount / fare_amount between.2
        and.25 then 'Great'
        when tip_amount / fare_amount between.25
        and.3 then 'Amazing'
        when tip_amount / fare_amount >.3 then 'Awesome'
        else 'Not Great'
    end as tip_class
from
    nyc_yellow_taxi_0601_0615_2016
limit
    10

6.5

select
    nta_name,
    sum(
        case
            when spc_common like '%maple%' then 1
            else 0
        end
    ) just_maples,
    count(ogc_fid) as all_trees
from
    nyc_2015_tree_census
group by
    nta_name
limit
    5

6 - Advanced SQL topics for spatial SQL

6.1 CASE/WHEN Conditionals

6.2

case
    when temp > 35 then 'Super Hot !'
    when temp between 30
    and 35 then 'Hot'
    when temp between 25
    and 30 then 'Pretty Warm'
    when temp between 20
    and 25 then 'Warm'
    when temp between 15
    and 20 then 'Cool / Warm'
    when temp between 10
    and 15 then 'Cool'
    when temp between 5
    and 10 then 'Chilly'
    when temp between 0
    and 5 then 'Cold'
    when temp between -5
    and 0 then 'Pretty Cold'
    when temp between -10
    and -5 then 'Very Cold'
    when temp between -15
    and -10 then 'Brrrrr'
    when temp > -15 then 'Frigid !'
    else null
end

6.3

select
    spc_common,
    case
        when spc_common like '%maple%' then 'Maple'
        else 'Not a Maple'
    end as is_maple
from
    nyc_2015_tree_census
limit
    10

6.4

select
    fare_amount,
    tip_amount,
    case
        when tip_amount / fare_amount between.15
        and.2 then 'Good'
        when tip_amount / fare_amount between.2
        and.25 then 'Great'
        when tip_amount / fare_amount between.25
        and.3 then 'Amazing'
        when tip_amount / fare_amount >.3 then 'Awesome'
        else 'Not Great'
    end as tip_class
from
    nyc_yellow_taxi_0601_0615_2016
limit
    10

6.5

select
    nta_name,
    sum(
        case
            when spc_common like '%maple%' then 1
            else 0
        end
    ) just_maples,
    count(ogc_fid) as all_trees
from
    nyc_2015_tree_census
group by
    nta_name
limit
    5

6.2 Common Table Expressions (CTEs) and Subqueries

6.6

select
    zipcode,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
    and zipcode in (
        select
            zipcode
        from
            nyc_zips
        where
            population > 100000
    )
group by
    zipcode

6.7

select
    zipcode,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    spc_common like '%maple%'
    and zipcode in ('11226', '11368', '11373')
group by
    zipcode

6.8

select
    zipcode,
    count(ogc_fid)
from
    nyc_2015_tree_census
where
    zipcode = (
        select
            zipcode
        from
            nyc_zips
        order by
            population asc
        limit
            1
    )
group by
    zipcode

6.9

with lanes as (
    select
        ogc_fid,
        lanecount
    from
        nyc_bike_routes
    order by
        lanecount desc
    limit
        3
)
select
    *
from
    lanes

6.10

with lanes as (
    select
        ogc_fid,
        lanecount
    from
        nyc_bike_routes
    order by
        lanecount desc
    limit
        3
), lanes_2 as (
    select
        ogc_fid,
        lanecount
    from
        nyc_bike_routes
    order by
        lanecount desc
    limit
        3 offset 12
)
select
    *
from
    lanes 

-- the UNION operator allows us to bring 
-- two tables with matching columns together  

union  
select
    *
from
    lanes_2

6.3 CRUD: Create, Read, Update, and Delete

6.12

create table test (city text, country text, size_rank numeric)

6.13

insert into
    test (city, country, size_rank)
values
    ('Tokyo', 'Japan', 1),
    ('Delhi', 'India', 2),
    ('Shanghai', 'China', 3),
    ('São Paulo', 'Brazil', 4),
    ('Mexico City', 'Mexico', 5)

6.14

alter table
    test
add
    column population numeric

6.15

alter table
    test rename column population to city_pop

6.16

alter table
    test
alter column
    city_pop type int

6.17

update
    test
set
    city = 'Ciudad de México'
where
    city = 'Mexico City'

6.18

alter table
    test rename to world_cities

6.19

alter table
    world_cities drop column city_pop

6.20

delete from
    world_cities
where
    city = 'Tokyo'

6.21

drop table world_cities

6.4 Statistical functions

6.22

select
    corr(assesstot :: numeric, lotarea :: numeric)
from
    nyc_mappluto

6.23

select
    stddev_samp(lotarea :: numeric)
from
    nyc_mappluto
where
    borough = 'BK'

6.24

select
    var_samp(lotarea :: numeric)
from
    nyc_mappluto
where
    borough = 'BK'

6.25

select
    mode() within group (
        order by
            lotarea :: numeric desc
    )
from
    nyc_mappluto
where
    borough = 'BK'

6.26

select
    ogc_fid,
    tip_amount,
    percent_rank() over(
        order by
            tip_amount asc
    )
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:00:05'
    and tip_amount > 0

6.27

select
    ogc_fid,
    tip_amount,
    rank() over(
        order by
            tip_amount asc
    )
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:00:05'
    and tip_amount > 0

6.28

select
    ogc_fid,
    tip_amount,
    dense_rank() over(
        order by
            tip_amount asc
    )
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:00:05'
    and tip_amount > 0

6.29

select
    percentile_disc(0.25) within group (
        order by
            tip_amount
    ) as per_25,
    percentile_disc(0.5) within group (
        order by
            tip_amount
    ) as per_50,
    percentile_disc(0.75) within group (
        order by
            tip_amount
    ) as per_75
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime between '2016-06-10 15:00:00'
    and '2016-06-10 15:00:05'
    and tip_amount > 0

6.5 Windows

6.30

with taxis as (
    select
        sum(total_amount) as total,
        pickup_datetime :: date as date
    from
        nyc_yellow_taxi_0601_0615_2016
    group by
        pickup_datetime :: date
    order by
        pickup_datetime :: date asc
)
select
    date,
    total,
    avg(total) over (
        order by
            date rows between 2 preceding
            and current row
    )
from
    taxis

6.31

with taxis as (
    select
        sum(total_amount) as total,
        passenger_count,
        pickup_datetime :: date as date
    from
        nyc_yellow_taxi_0601_0615_2016
    group by
        pickup_datetime :: date,
        passenger_count
    order by
        pickup_datetime :: date asc,
        passenger_count desc
)
select
    date,
    total,
    passenger_count,
    sum(total) over (
        partition by passenger_count
        order by
            date rows between 2 preceding
            and current row
    )
from
    taxis

6.32

select
    avg(tip_amount / total_amount),
    extract(
        hour
        from
            pickup_datetime
    ) as hour_of_day
from
    nyc_yellow_taxi_0601_0615_2016
where
    pickup_datetime :: date = '2016-06-15'

    -- since we can't divide by 0 we will remove all amounts 
    -- that equal 0
    
    and total_amount > 0
group by
    extract(
        hour
        from
            pickup_datetime
    )
order by
    extract(
        hour
        from
            pickup_datetime
    ) asc

6.33

with taxis as (
    select
        avg(tip_amount / total_amount) as tip_percentage,
        date_trunc('hour', pickup_datetime) as day_hour
    from
        nyc_yellow_taxi_0601_0615_2016
    where
        total_amount > 5
    group by
        date_trunc('hour', pickup_datetime)
)
select
    day_hour,
    tip_percentage,
    avg(tip_percentage) over (
        order by
            day_hour asc rows between 5 preceding
            and current row
    ) as moving_average
from
    taxis

6.34

with taxis as (
    select
        sum(total_amount) as total,
        passenger_count,
        pickup_datetime :: date as date
    from
        nyc_yellow_taxi_0601_0615_2016
    group by
        pickup_datetime :: date,
        passenger_count
    order by
        pickup_datetime :: date asc,
        passenger_count desc
)
select
    date,
    total,
    passenger_count,
    sum(total) over (
        partition by passenger_count
        order by
            date
    )
from
    taxis

6.35

select
    row_number() over() as row_no,
    ogc_fid
from
    nyc_yellow_taxi_0601_0615_2016
limit
    5

6.36

select
    row_number() over(partition by pickup_datetime) as row_no,
    ogc_fid,
    pickup_datetime
from
    nyc_yellow_taxi_0601_0615_2016
limit
    5 offset 100000 -- using offset since the first part of the datasets has all passenger counts as 0

6.37

with taxis as (
    select
        sum(total_amount) as total,
        passenger_count,
        pickup_datetime :: date as date
    from
        nyc_yellow_taxi_0601_0615_2016
    group by
        pickup_datetime :: date,
        passenger_count
    order by
        pickup_datetime :: date asc,
        passenger_count desc
)
select
    date,
    total,
    passenger_count,
    total - lag(total, 1) over (
        partition by passenger_count
        order by
            date
    )
from
    taxis

6.6 Joins

6.38

select
    nyc_311.complaint_type,
    nyc_311.incident_zip,
    nyc_zips.population
from
    nyc_311
    join nyc_zips on nyc_311.incident_zip = nyc_zips.zipcode
limit
    5

6.39

select
    nyc_311.complaint_type,
    nyc_311.incident_zip,
    nyc_zips.population
from
    nyc_311
    join nyc_zips on nyc_311.incident_zip = nyc_zips.zipcode
where
    nyc_zips.population > 80000
limit
    5

6.40

select
    a.complaint_type,
    a.incident_zip,
    b.population
from
    nyc_311 a
    join nyc_zips b on a.incident_zip = b.zipcode
where
    b.population > 80000
limit
    5

6.41

with b as (select population, zipcode from nyc_zips limit 30)

select
    a.complaint_type,
    a.incident_zip,
    b.population
from
    nyc_311 a
    left join b on a.incident_zip = b.zipcode
limit
    5

6.42

with a as (
    select
        complaint_type,
        incident_zip
    from
        nyc_311
    limit
        30
)
select
    a.complaint_type,
    a.incident_zip,
    b.population
from
    a
    right join nyc_zips b on a.incident_zip = b.zipcode
limit
    5

6.43

with a as (
    select
        complaint_type,
        incident_zip
    from
        nyc_311
    limit
        30
), b as (
    select
        population,
        zipcode
    from
        nyc_zips
    limit
        30
)
select
    a.complaint_type,
    a.incident_zip,
    b.population
from
    a full
    outer join b on a.incident_zip = b.zipcode
limit
    100

6.44

with a as (
    select
        neighborhood
    from
        nyc_neighborhoods
    limit
        2
), b as (
    select
        population,
        zipcode
    from
        nyc_zips
    limit
        2
)
select
    a.neighborhood,
    b.population
from
    a
    cross join b

6.45

with a as (
    select
        neighborhood
    from
        nyc_neighborhoods
    limit
        2
), b as (
    select
        population,
        zipcode
    from
        nyc_zips
    limit
        2
)
select
    a.neighborhood,
    b.population
from
    a,
    b

6.46

with a as (
    select
        neighborhood
    from
        nyc_neighborhoods
    limit
        2
), b as (
    select
        population,
        zipcode
    from
        nyc_zips
    limit
        2
)
select
    a.neighborhood,
    b.population,
    b.population / 1000 as calculation
from
    a,
    b

6.47

with a as (
    select
        ogc_fid,
        zipcode :: text
    from
        nyc_mappluto
)
select
    count(a.ogc_fid),
    b.zipcode
from
    nyc_zips b
    join a using(zipcode)
group by
    b.zipcode
order by
    count(a.ogc_fid) desc

6.49

with a as (
    select
        ogc_fid,
        zipcode :: text
    from
        nyc_mappluto
    order by
        ogc_fid desc
    limit
        5000
), c as (
    select
        ogc_fid,
        zipcode
    from
        nyc_2015_tree_census
    order by
        ogc_fid desc
    limit
        5000
)
select
    count(a.ogc_fid) as buildings,
    -- count(c.ogc_fid) as trees,  
    b.zipcode
from
    nyc_zips b
    join a using(zipcode) 
    -- join c  
    -- using(zipcode)  
group by
    b.zipcode
order by
    count(a.ogc_fid) desc

6.50

with a as (
    select
        ogc_fid,
        zipcode :: text
    from
        nyc_mappluto
    order by
        ogc_fid desc
    limit
        5000
), c as (
    select
        ogc_fid,
        zipcode
    from
        nyc_2015_tree_census
    order by
        ogc_fid desc
    limit
        5000
)
select
    -- count(a.ogc_fid) as buildings, 
    count(c.ogc_fid) as trees,
    b.zipcode
from
    nyc_zips b 
    -- join a using(zipcode)  
    join c using(zipcode)
group by
    b.zipcode 
    -- order by count(a.ogc_fid) desc  
order by
    count(c.ogc_fid) desc

6.51

with a as (
    select
        count(ogc_fid) as buildings,
        zipcode :: text
    from
        nyc_mappluto
    group by
        zipcode
),
b as (
    select
        count(ogc_fid) as trees,
        zipcode
    from
        nyc_2015_tree_census
    group by
        zipcode
)
select
    a.buildings,
    b.trees,
    c.zipcode
from
    nyc_zips c
    join a using(zipcode)
    join b using(zipcode)
order by
    b.trees desc

6.7 Lateral Joins

6.52

select
    a.neighborhood,
    trees.trees_per_sq_meter
from
    nyc_neighborhoods a
    cross join lateral (
        select
            count(ogc_fid) / a.area :: numeric as trees_per_sq_meter
        from
            nyc_2015_tree_census
        where
            a.neighborhood = neighborhood
    ) trees
order by
    trees.trees_per_sq_meter desc

6.8 Triggers

6.53

create table cities (
    city text,
    country text,
    size_rank numeric,
    time_zone text,
    time_zone_abbrev text
)

6.54

create
or replace function set_timezones() returns trigger language plpgsql as $$ begin
update
    cities
set
    time_zone = a.name,
    time_zone_abbrev = a.abbrev
from
    pg_timezone_names a
where
    a.name like '%' || replace(city, ' ', ' _ ') || '%';

return new;

end;

$$

6.55

create trigger update_city_tz
after
insert
    on cities for each row execute procedure set_timezones();

6.56

insert into
    cities (city, country, size_rank)
values
    ('Tokyo', 'Japan', 1),
    ('Delhi', 'India', 2),
    ('Shanghai', 'China', 3),
    ('São Paulo', 'Brazil', 4),
    ('Mexico City', 'Mexico', 5)

6.57

create
or replace function set_timezones() returns trigger language plpgsql as $$ begin
update
    cities
set
    time_zone = data.name,
    time_zone_abbrev = data.abbrev
from
    (
        select
            name,
            abbrev
        from
            pg_timezone_names
        where
            name like '%' || replace(city, ' ', '_') || '%'
    ) as data;

return new;

end;

$$

6.9 UDFs

6.59

create
or replace function tip_percentage(tip_column float, total_column float) 
returns numeric 
language plpgsql 
as $$ 

declare tip_percentage numeric;

begin if total_column = 0 then tip_percentage = 0;

elsif total_column is null then tip_percentage = 0;

elsif total_column > 0 then tip_percentage = tip_column / total_column;

end if;

return tip_percentage;

end;

$$

6.60

select
    total_amount,
    tip_amount,
    tip_percentage(tip_amount, total_amount)
from
    nyc_yellow_taxi_0601_0615_2016
order by
    pickup_datetime desc
limit
    10 offset 10000

6.61


6.62

select
    *
from
    find_311_text_match('food')
limit
    5

7 - Using the GEOMETRY

7.2 GEOMETRY Types

7.2

select
   geom
from
   nyc_building_footprints
limit
   1

7.3

select
   st_astext(geom) as wkt
from
   nyc_building_footprints
limit
   5

7.4

insert into
    geometries
values
    ('point', st_geomfromtext('POINT(0 0)')),
    (
        'line',
        st_geomfromtext('LINESTRING(0 0,1 1,1 2)')
    ),
    (
        'polygon',
        st_geomfromtext(
            'POLYGON((0 0,4 0,4 4,0 4,0 0),(1 1, 2 1, 2 2, 1 2,1 1))'
        )
    )

7.5

insert into
    geometries
values
    (
        'multipoint',
        st_geomfromtext('MULTIPOINT((0 0),(1 2))')
    ),
    (
        'multiline',
        st_geomfromtext('MULTILINESTRING((0 0,1 1,1 2),(2 3,3 2,5 4))')
    ),
    (
        'multipolygon',
        st_geomfromtext(
            'MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)), ((-1 -1,-1 -2,-2 -2,-2 -1,-1 -1)))'
        )
    ),
    (
        'geometry collection',
        st_geomfromtext(
            'GEOMETRYCOLLECTION(POINT(2 3),LINESTRING(2 3,3 4))'
        )
    )

7.6

select
    st_curvetoline(
        st_geomfromtext('CIRCULARSTRING(0 0, 4 0, 4 4, 0 4, 0 0)')
    ) as geom

7 - Using the GEOMETRY

7.2 GEOMETRY Types

7.2

select
   geom
from
   nyc_building_footprints
limit
   1

7.3

select
   st_astext(geom) as wkt
from
   nyc_building_footprints
limit
   5

7.4

insert into
    geometries
values
    ('point', st_geomfromtext('POINT(0 0)')),
    (
        'line',
        st_geomfromtext('LINESTRING(0 0,1 1,1 2)')
    ),
    (
        'polygon',
        st_geomfromtext(
            'POLYGON((0 0,4 0,4 4,0 4,0 0),(1 1, 2 1, 2 2, 1 2,1 1))'
        )
    )

7.5

insert into
    geometries
values
    (
        'multipoint',
        st_geomfromtext('MULTIPOINT((0 0),(1 2))')
    ),
    (
        'multiline',
        st_geomfromtext('MULTILINESTRING((0 0,1 1,1 2),(2 3,3 2,5 4))')
    ),
    (
        'multipolygon',
        st_geomfromtext(
            'MULTIPOLYGON(((0 0,4 0,4 4,0 4,0 0),(1 1,2 1,2 2,1 2,1 1)), ((-1 -1,-1 -2,-2 -2,-2 -1,-1 -1)))'
        )
    ),
    (
        'geometry collection',
        st_geomfromtext(
            'GEOMETRYCOLLECTION(POINT(2 3),LINESTRING(2 3,3 4))'
        )
    )

7.6

select
    st_curvetoline(
        st_geomfromtext('CIRCULARSTRING(0 0, 4 0, 4 4, 0 4, 0 0)')
    ) as geom

7.3 Size of GEOMETRY data

7.7

select
    st_memsize(st_geomfromtext('POINT(0 0)')) as geom

7.8

select
    st_memsize(st_geomfromtext('LINESTRING(0 0, 0 1)')) as geom

7.9

select
    st_memsize(
        st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
    ) as geom

7.10

select
    st_memsize(geom)
from
    nyc_neighborhoods
where
    neighborhood = 'College Point'

7.11

select
    st_npoints(geom) as points,
    st_geometrytype(geom) as type,
    st_numgeometries(geom) as geometries
from
    nyc_neighborhoods
where
    neighborhood = 'College Point'

7.12

select
    st_memsize(
        st_geomfromtext('MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)))')
    ) as geom

7.13

select
    st_memsize(
        st_geomfromtext(
            'MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)), ((1 1, 2 1, 2 2, 1 2, 1 1)))'
        )
    ) as geom

7.4 A note on PostGIS documentation

7.14

select
    st_length(
        st_geomfromtext(
            'LINESTRING(743238 2967416,743238 2967450,743265 2967450, 
            743265.625 2967416,743238 2967416)',
            2249
        )
    );

-- Transforming WGS 84 LINESTRING to Massachusetts State Plane Meters

7.15

select
    st_length(
        st_transform(
            st_geomfromewkt(
                'srid=4326;linestring(-72.1260 42.45, -72.1240 42.45666, -72.123 42.1546)'
            ),
            26986
        )
    );

7.6 Constructors

7.16

with a as (
    select
        geom
    from
        nyc_zips
    limit
        5
)
select
    st_collect(geom)
from
    a

7.17

with a as (
    select
        geom,
        population,
        zipcode
    from
        nyc_zips
    limit
        5
)
select
    string_agg(zipcode, ',') as zips,
    sum(population) as total_pop,
    st_collect(geom)
from
    a

7.18

select
    st_makeenvelope(-66.125091, 18.296531, -65.99142, 18.471986) as geom

7.19

select
    st_makeenvelope(
        -66.125091,
        18.296531,
        -65.99142,
        18.471986,
        4326
    ) as geom

7.20

select
    st_makepoint(pickup_longitude, pickup_latitude) as geom
from
    nyc_yellow_taxi_0601_0615_2016
limit
    100

7.21

select
    ogc_fid,
    st_setsrid(
        st_makepoint(pickup_longitude, pickup_latitude),
        4326
    ) as geom
from
    nyc_yellow_taxi_0601_0615_2016
limit
    100

7.22

create or replace function BuildPoint(x numeric, y numeric, srid int) 
    returns geometry 
    language plpgsql 
    as $$ 
        begin 
        return st_setsrid(st_makepoint(x, y), srid);
    end;
$$;

7.23

select
    ogc_fid,
    buildpoint(
        pickup_longitude :: numeric,
        pickup_latitude :: numeric,
        4326
    ) as geom
from
    nyc_yellow_taxi_0601_0615_2016
limit
    100

7.24

SELECT
    st_setsrid(
        ST_Translate(ST_Scale(ST_Letters('Spatial SQL'), 1, 1), 0, 0),
        4326
    );

7.7 Accessors

7.25

select
    st_dump(geom) as geom
from
    nyc_neighborhoods
where
    neighborhood = 'City Island'

7.26

select
    (st_dump(geom)).geom as geom
from
    nyc_neighborhoods
where
    neighborhood = 'City Island'

7.27

select
    st_geometrytype(geom)
from
    nyc_neighborhoods
where
    neighborhood = 'City Island'

7.28

select
    st_memsize(geom)
from
    nyc_neighborhoods
where
    neighborhood = 'City Island'

7.29

select
    st_npoints(geom)
from
    nyc_neighborhoods
where
    neighborhood = 'City Island'

7.30

select
    st_pointn(geom, 1) as geom
from
    nyc_bike_routes
where
    segmentid = '331385'

7.31

select
    st_geometrytype(geom) as geom
from
    nyc_bike_routes
where
    segmentid = '331385'

7.32

select
    st_pointn(st_linemerge(geom), 1) as geom
from
    nyc_bike_routes
where
    segmentid = '331385'

7.9 Validators

7.33

select
    *
from
    nyc_building_footprints
where
    st_isvalid(geom) is false

7.34

select
    mpluto_bbl,
    st_isvaliddetail(geom)
from
    nyc_building_footprints
where
    mpluto_bbl in ('1022430261', '1016710039', '4039760001')

7.35

select
    mpluto_bbl,
    st_isvalidreason(geom)
from
    nyc_building_footprints
where
    mpluto_bbl in ('1022430261', '1016710039', '4039760001')

7.36

select
    mpluto_bbl,
    st_isvalid(st_makevalid(geom))
from
    nyc_building_footprints
where
    mpluto_bbl in ('1022430261', '1016710039', '4039760001')

7.37

select
    st_srid(geom)
from
    nyc_building_footprints
limit
    3

7.38

select
    ogc_fid,
    st_transform(geom, 2263) as geom
from
    nyc_building_footprints
limit
    3

7.10 Inputs

7.39

select
    st_geomfromtext('POINT(-73.9772294 40.7527262)', 4326) as geom

7.11 Outputs

7.40

select
    st_asgeojson(geom)
from
    nyc_building_footprints
limit
    3

7.41

select
    st_geomfromtext('POINT(-73.9772294 40.7527262)', 4326) as geom

7.42

select
    st_buffer(st_transform(geom, 4326) :: geography, -200)
from
    nyc_zips
limit
    10

7.43

select
    st_centroid(st_transform(geom, 4326) :: geography)
from
    nyc_zips
limit
    10

7.44

select
    st_transform(geom, 4326) as original,
    st_chaikinsmoothing(st_transform(geom, 4326), 1) as one,
    st_chaikinsmoothing(st_transform(geom, 4326), 5) as five
from
    nyc_zips
limit
    10

7.45

-- Find the first 50 trees in Fort Greene with the
-- Latitude and longitude columns
with a as (
    select
        latitude,
        longitude
    from
        nyc_2015_tree_census
    where
        nta_name = 'Fort Greene'
    limit
        50
), 

-- Turn the latitude/longitude columns into geometries
b as (
    select
        st_collect(
            buildpoint(longitude :: numeric, latitude :: numeric, 4326)
        ) as geom
    from
        a
)

-- Create multiple concave hulls with various concave-ness
-- and use UNION to turn them into a single table
select
    st_concavehull(geom, 0.1) as hull
from
    b
union
select
    st_concavehull(geom, 0.3) as hull
from
    b
union
select
    st_concavehull(geom, 0.7) as hull
from
    b

7.46

with a as (
    select
        latitude,
        longitude
    from
        nyc_2015_tree_census
    where
        nta_name = 'Fort Greene'
    limit
        50
), b as (
    select
        st_collect(
            buildpoint(longitude :: numeric, latitude :: numeric, 4326)
        ) as geom
    from
        a
)
select
    st_convexhull(geom) as hull
from
    b

7.47

select
    st_delaunaytriangles(st_transform(geom, 4326)) as triangles
from
    nyc_zips
where
    zipcode = '10009'

7.48

select
    st_generatepoints(st_transform(geom, 4326), 500) as points
from
    nyc_zips
where
    zipcode = '10009'

7.49

select
st_linemerge(geom) as geom from
nyc_bike_routes
where
street = '7 AV'
and fromstreet = '42 ST'
and tostreet = '65 ST'

7.50

-- Create multiple geometries with different simplification levels
-- and UNION them into one table
select
    st_transform(geom, 4326) as geom
from
    nyc_zips
where
    zipcode = '11693'
union
select
    st_transform(st_simplify(geom, 1), 4326) as geom
from
    nyc_zips
where
    zipcode = '11693'
union
select
    st_transform(st_simplify(geom, 10), 4326) as geom
from
    nyc_zips
where
    zipcode = '11693'
union
select
    st_transform(st_simplify(geom, 50), 4326) as geom
from
    nyc_zips
where
    zipcode = '11693'

7.51

-- Create multiple geometries that share borders with different 
-- simplification levels and UNION them into one table
select
    st_transform(geom, 4326) as geom
from
    nyc_zips
where
    zipcode = '11434'
    or st_touches(
        geom,
        (
            select
                geom
            from
                nyc_zips
            where
                zipcode = '11434'
        )
    )
union
select
    st_transform(st_simplifypreservetopology(geom, 1), 4326) as geom
from
    nyc_zips
where
    zipcode = '11434'
    or st_touches(
        geom,
        (
            select
                geom
            from
                nyc_zips
            where
                zipcode = '11434'
        )
    )
union
select
    st_transform(st_simplifypreservetopology(geom, 10), 4326) as geom
from
    nyc_zips
where
    zipcode = '11434'
    or st_touches(
        geom,
        (
            select
                geom
            from
                nyc_zips
            where
                zipcode = '11434'
        )
    )
union
select
    st_transform(st_simplifypreservetopology(geom, 50), 4326) as geom
from
    nyc_zips
where
    zipcode = '11434'
    or st_touches(
        geom,
        (
            select
                geom
            from
                nyc_zips
            where
                zipcode = '11434'
        )
    )

7.12 Measurements in spatial SQL

7.52

select
    st_area(geom) as area
from
    nyc_building_footprints
limit
    5

7.53

select
    st_area(geom :: geography) as area
from
    nyc_building_footprints
limit
    5

7.54

with one as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10009'
),
two as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    st_closestpoint(one.geom, two.geom) as point
from
    one,
    two

7.55

with one as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10009'
),
two as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    st_distance(one.geom, two.geom) as dist
from
    one,
    two

7.56

with one as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10009'
),
two as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    st_distance(one.geom, two.geom) / 5280 as dist
from
    one,
    two

7.57

with one as (
    select
        geom :: geography as geog
    from
        nyc_zips
    where
        zipcode = '10009'
),
two as (
    select
        geom :: geography as geog
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    st_distance(one.geog, two.geog) / 1609 as dist
from
    one,
    two

7.58

select
    st_srid(geom)
from
    nyc_zips
limit
    1   

7.59

with one as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10009'
),
two as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    st_transform(st_shortestline(one.geom, two.geom), 4326) as line
from
    one,
    two

7.60

select
    st_length(geom :: geography)
from
    nyc_bike_routes
limit
    1

7.61

select
    st_perimeter(geom)
from
    nyc_zips
where
    zipcode = '10009'

7.62

with one as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10009'
),
two as (
    select
        geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    st_transform(st_shortestline(one.geom, two.geom), 4326) as line
from
    one,
    two

8 - Spatial relationships

8.2 Ways to use spatial relationship functions

8.1

select
    ogc_fid,
    trip_distance,
    total_amount,
    
    -- ST_Intersects will return a boolean in a column
    st_intersects(

        -- First geometry is at the pickup location using the buildpoint function
        buildpoint(
            pickup_longitude :: numeric,
            pickup_latitude :: numeric,
            4326
        ),

        -- This selects the geometry for the West Village
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        )
    )
from
    nyc_yellow_taxi_0601_0615_2016
order by
    pickup_datetime asc
limit
    10

8.2

select
    ogc_fid,
    trip_distance,
    total_amount
from
    nyc_yellow_taxi_0601_0615_2016
where

    -- Using ST_Intersects in the WHERE clause
    st_intersects(

        -- Using ST_Intersects in the WHERE clause, first with the pick up point
        buildpoint(
            pickup_longitude :: numeric,
            pickup_latitude :: numeric,
            4326
        ),

        -- Selecting the geometry for the West Village
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        )
    )
order by
    pickup_datetime asc
limit
    10

8.3

alter table
    nyc_yellow_taxi_0601_0615_2016
add
    column pickup geometry,
add
    column dropoff geometry

8.4

update
    nyc_yellow_taxi_0601_0615_2016
set
    pickup = st_setsrid(
        st_makepoint(pickup_longitude, pickup_latitude),
        4326
    ),
    dropoff = st_setsrid(
        st_makepoint(pickup_longitude, pickup_latitude),
        4326
    )

8.5

select
    ogc_fid,
    trip_distance,
    total_amount
from
    nyc_yellow_taxi_0601_0615_2016
where
    st_intersects(
        pickup,
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        )
    )
order by
    pickup_datetime asc
limit
    10

8.6

select
    a.ogc_fid,
    a.trip_distance,
    a.total_amount,
    st_intersects(a.pickup, b.geom)
from
    nyc_yellow_taxi_0601_0615_2016 a,
    nyc_neighborhoods b
where
    b.neighborhood = 'West Village'
order by
    a.pickup_datetime asc
limit
    100

8.7

select
    a.ogc_fid,
    a.trip_distance,
    a.total_amount
from
    nyc_yellow_taxi_0601_0615_2016 a

    -- Since ST_Intersects will return true or false
    -- we can use it to evaluate the join
    join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
    b.neighborhood = 'West Village'
order by
    a.pickup_datetime asc
limit
    10

8.8

select
    a.ogc_fid,
    a.trip_distance,
    a.total_amount,
    b.neighborhood
from
    nyc_yellow_taxi_0601_0615_2016 a
    join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
    b.neighborhood = 'West Village'
order by
    a.pickup_datetime asc
limit
    10

8.9

select

    -- Here we can aggregate other data that all falls within our 
    -- joined geometry data
    a.neighborhood,
    sum(b.total_amount) as sum_amount,
    avg(b.total_amount) as avg_amount
from
    nyc_neighborhoods a
    join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
where 
	a.neighborhood = 'West Village'
group by
    a.neighborhood
limit
    5

8.10

with a as (
    select
        a.neighborhood,
        sum(b.total_amount) as sum_amount
    from
        nyc_neighborhoods a
        join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
    where
        a.neighborhood = 'West Village'
    group by
        a.neighborhood
)
select
    a.sum_amount,
    b.*
from
    a
    join nyc_neighborhoods b using (neighborhood)

8.11

select
    zipcode,
    population,
    z.neighbor_sum
from
    nyc_zips a

    -- This join will join across to each row of data above
    -- but can also reference data from the outer query
    cross join lateral (
        select
            sum(population) as neighbor_sum
        from
            nyc_zips
        where
            st_intersects(geom, a.geom)
            and a.zipcode != zipcode
    ) z

8 - Spatial relationships

8.2 Ways to use spatial relationship functions

8.1

select
    ogc_fid,
    trip_distance,
    total_amount,
    
    -- ST_Intersects will return a boolean in a column
    st_intersects(

        -- First geometry is at the pickup location using the buildpoint function
        buildpoint(
            pickup_longitude :: numeric,
            pickup_latitude :: numeric,
            4326
        ),

        -- This selects the geometry for the West Village
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        )
    )
from
    nyc_yellow_taxi_0601_0615_2016
order by
    pickup_datetime asc
limit
    10

8.2

select
    ogc_fid,
    trip_distance,
    total_amount
from
    nyc_yellow_taxi_0601_0615_2016
where

    -- Using ST_Intersects in the WHERE clause
    st_intersects(

        -- Using ST_Intersects in the WHERE clause, first with the pick up point
        buildpoint(
            pickup_longitude :: numeric,
            pickup_latitude :: numeric,
            4326
        ),

        -- Selecting the geometry for the West Village
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        )
    )
order by
    pickup_datetime asc
limit
    10

8.3

alter table
    nyc_yellow_taxi_0601_0615_2016
add
    column pickup geometry,
add
    column dropoff geometry

8.4

update
    nyc_yellow_taxi_0601_0615_2016
set
    pickup = st_setsrid(
        st_makepoint(pickup_longitude, pickup_latitude),
        4326
    ),
    dropoff = st_setsrid(
        st_makepoint(pickup_longitude, pickup_latitude),
        4326
    )

8.5

select
    ogc_fid,
    trip_distance,
    total_amount
from
    nyc_yellow_taxi_0601_0615_2016
where
    st_intersects(
        pickup,
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        )
    )
order by
    pickup_datetime asc
limit
    10

8.6

select
    a.ogc_fid,
    a.trip_distance,
    a.total_amount,
    st_intersects(a.pickup, b.geom)
from
    nyc_yellow_taxi_0601_0615_2016 a,
    nyc_neighborhoods b
where
    b.neighborhood = 'West Village'
order by
    a.pickup_datetime asc
limit
    100

8.7

select
    a.ogc_fid,
    a.trip_distance,
    a.total_amount
from
    nyc_yellow_taxi_0601_0615_2016 a

    -- Since ST_Intersects will return true or false
    -- we can use it to evaluate the join
    join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
    b.neighborhood = 'West Village'
order by
    a.pickup_datetime asc
limit
    10

8.8

select
    a.ogc_fid,
    a.trip_distance,
    a.total_amount,
    b.neighborhood
from
    nyc_yellow_taxi_0601_0615_2016 a
    join nyc_neighborhoods b on st_intersects(a.pickup, b.geom)
where
    b.neighborhood = 'West Village'
order by
    a.pickup_datetime asc
limit
    10

8.9

select

    -- Here we can aggregate other data that all falls within our 
    -- joined geometry data
    a.neighborhood,
    sum(b.total_amount) as sum_amount,
    avg(b.total_amount) as avg_amount
from
    nyc_neighborhoods a
    join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
where 
	a.neighborhood = 'West Village'
group by
    a.neighborhood
limit
    5

8.10

with a as (
    select
        a.neighborhood,
        sum(b.total_amount) as sum_amount
    from
        nyc_neighborhoods a
        join nyc_yellow_taxi_0601_0615_2016 b on st_intersects(b.pickup, a.geom)
    where
        a.neighborhood = 'West Village'
    group by
        a.neighborhood
)
select
    a.sum_amount,
    b.*
from
    a
    join nyc_neighborhoods b using (neighborhood)

8.11

select
    zipcode,
    population,
    z.neighbor_sum
from
    nyc_zips a

    -- This join will join across to each row of data above
    -- but can also reference data from the outer query
    cross join lateral (
        select
            sum(population) as neighbor_sum
        from
            nyc_zips
        where
            st_intersects(geom, a.geom)
            and a.zipcode != zipcode
    ) z

8.3 Spatial relationship functions

8.12

select
    name,
    geom
from
    nyc_building_footprints
where
    st_contains(
        (
            select
                st_buffer(
                    buildpoint(-73.993584, 40.750580, 4326) :: geography,
                    200
                ) :: geometry
        ),
        geom
    )

8.13

select
    a.ogc_fid,
    a.name,
    st_centroid(st_transform(a.geom, 4326)),
    b.ogc_fid as b_id,
    st_transform(b.geom, 4326)
from
    nyc_building_footprints a
    join nyc_bike_routes b on st_crosses(a.geom, b.geom)

8.14

select
    name,
    geom
from
    nyc_building_footprints
where
    st_disjoint(
        (
            select
                st_buffer(
                    buildpoint(-74.002222, 40.733889, 4326) :: geography,
                    200
                ) :: geometry
        ),
        geom
    )
order by
    st_distance(
        (
            select
                st_buffer(
                    buildpoint(-74.002222, 40.733889, 4326) :: geography,
                    200
                ) :: geometry
        ),
        geom
    ) asc
limit
    200

8.15

select
    st_geomfromtext(
        'GEOMETRYCOLLECTION(LINESTRING(0 0, 3 3), POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)))'
    ) as geom,
    st_overlaps(
        st_geomfromtext('LINESTRING(0 0, 0 3)'),
        st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
    )

8.16

select
    st_geomfromtext(
        'GEOMETRYCOLLECTION(POLYGON((1 0, 1 1, 1 2, 0 2, 1 0)), POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)))'
    ) as geom,
    st_overlaps(
        st_geomfromtext('POLYGON((1 0, 1 1, 1 2, 0 2, 1 0))'),
        st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
    )

8.17

select
    st_geomfromtext(
        'GEOMETRYCOLLECTION(LINESTRING(1 0, 1 1, 0 1, 0 0), LINESTRING(2 1, 1 1, 0 1, 1 2))'
    ) as geom,
    st_overlaps(
        st_geomfromtext('LINESTRING(1 0, 1 1, 0 1, 0 0)'),
        st_geomfromtext('LINESTRING(2 1, 1 1, 0 1, 1 2)')
    )

8.18

select
    *
from
    nyc_zips
where
    st_touches(
        geom,
        (
            select
                geom
            from
                nyc_zips
            where
                zipcode = '10009'
        )
    )

8.19

select
    name,
    geom
from
    nyc_building_footprints
where
    st_within(
        geom,
        (
            select
                st_buffer(
                    buildpoint(-74.002222, 40.733889, 4326) :: geography,
                    200
                ) :: geometry
        )
    )

8.4 Distance Relationship Functions

8.20

select
    name,
    geom
from
    nyc_building_footprints
where
    st_intersects(
        geom,
        (
            select
                st_buffer(
                    buildpoint(-74.002222, 40.733889, 4326) :: geography,
                    200
                ) :: geometry
        )
    )

8.21

select
    name,
    geom
from
    nyc_building_footprints
where
    st_intersects(
        geom,
        (
            select
                st_buffer(
                    buildpoint(-74.002222, 40.733889, 4326) :: geography,
                    10000
                ) :: geometry
        )
    )

8.22

select
    name,
    geom
from
    nyc_building_footprints
where
    st_dwithin(
        st_transform(geom, 3857),
        buildpoint(-74.002222, 40.733889, 3857),
        200
    )

8.23

select
    name,
    geom
from
    nyc_building_footprints
where
    st_dwithin(
        st_transform(geom, 3857),
        st_transform(
            st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
            3857
        ),
        200
    )

8.24

alter table
    nyc_building_footprints
add
    column geom_3857 geometry;

update
    nyc_building_footprints
set
    geom_3857 = st_transform(geom, 3857);

8.25

select
    name,
    geom
from
    nyc_building_footprints
where
    st_dwithin(
        geom_3857,
        st_transform(
            st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
            3857
        ),
        200
    )

8.26

with a as (
    select
        st_transform(
            st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
            3857
        ) as geo
)
select
    name,
    geom
from
    nyc_building_footprints,
    a
where
    st_dwithin(geom_3857, a.geo, 200)

8.27

create index geom_3857_idx on 
nyc_building_footprints using gist(geom_3857)

8.28

with a as (
    select
        st_transform(
            st_setsrid(st_makepoint(-74.002222, 40.733889), 4326),
            3857
        ) as geo
)
select
    name,
    geom
from
    nyc_building_footprints,
    a
where
    st_dwithin(
        geom_3857,
        (
            select
                geo
            from
                a
        ),
        10000
    )

8.5 Spatial Joins

8.29

select
    a.ogc_fid,
    a.health,
    a.spc_common,
    b.neighborhood
from
    nyc_2015_tree_census a,
    nyc_neighborhoods b
where
    st_intersects(a.geom, b.geom)
    and a.spc_common ilike '%maple%'

8.30

select
    a.ogc_fid,
    a.health,
    a.spc_common,
    b.neighborhood
from
    nyc_2015_tree_census a
    join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
    and a.spc_common ilike '%maple%'

8.31

create table nyc_2010_neighborhoods_subdivide as
select
    st_subdivide(geom) as geom,
    neighborhood
from
    nyc_neighborhoods

8.32

with trees as (
        select
            ogc_fid,
            health,
            spc_common,
            geom
        from
            nyc_2015_tree_census
        where
            spc_common ilike '%maple%'
    )
select
    trees.ogc_fid,
    trees.health,
    trees.spc_common,
    b.neighborhood
from
    trees
    join nyc_neighborhoods_subdivide b on st_intersects(trees.geom, b.geom)

8.33

create index nyc_neighborhoods_subdivide_geom_idx 
n nyc_neighborhoods_subdivide using gist(geom)

8.34

cluster nyc_neighborhoods_subdivide using nyc_neighborhoods_subdivide_geom_idx;

cluster nyc_2015_tree_census using nyc_2015_tree_census_geom_geom_idx;

8.35

select
    count(a.ogc_fid) filter (
        where
            a.spc_common ilike '%maple%'
    ) :: numeric / count(a.ogc_fid) :: numeric as percent_trees,
    count(a.ogc_fid) filter (
        where
            a.spc_common ilike '%maple%'
    ) as count_maples,
    b.neighborhood
from
    nyc_2015_tree_census a
    join nyc_neighborhoods_subdivide b on st_intersects(a.geom, b.geom)
group by
    b.neighborhood

8.6 Overlay Functions

8.36

select
    st_difference(a.geom, st_transform(b.geom, 4326)) as geom
from
    nyc_neighborhoods a,
    nyc_zips b
where
    b.zipcode = '10014'
    and a.neighborhood = 'West Village'

8.37

select
    st_intersection(a.geom, st_transform(b.geom, 4326)) as geom
from
    nyc_neighborhoods a,
    nyc_zips b
where
    b.zipcode = '10003'
    and a.neighborhood = 'Gramercy'

8.38

select
    st_intersection(a.geom, st_transform(b.geom, 4326)) as geom
from
    nyc_neighborhoods a,
    nyc_zips b
where
    b.zipcode = '10003'
    and a.neighborhood = 'Gramercy'
union
select
    geom
from
    nyc_neighborhoods
where
    neighborhood = 'Gramercy'
union
select
    st_transform(geom, 4326)
from
    nyc_zips
where
    zipcode = '10003'

8.39

-- Query all the street segments between W 39 ST and BANJ ST
with a as (
    select
        st_union(geom) as geom
    from
        nyc_bike_routes
    where
        fromstreet IN ('W HOUSTON ST', 'BANK ST')
        or tostreet IN ('BANK ST', 'W 39 ST', 'W HOUSTON ST')
)
select
    st_split(
        -- Select the geometgry for the West Village
        (
            select
                st_transform(geom, 4326)
            from
                nyc_neighborhoods
            where
                neighborhood = 'West Village'
        ),
        
        -- Split it with our geometry in our CTE above
        (
            select
                geom
            from
                a
        )
    )

8.40

select
    st_subdivide(st_transform(geom, 4326), 50) as geom
from
    nyc_neighborhoods
where
    neighborhood = 'West Village'

8.41

select
    st_union(geom) as geom
from
    nyc_neighborhoods
where
    st_intersects(
        geom,
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10009'
        )
    )

8.7 Cluster Functions

8.42

alter table
    nyc_311
add
    column geom geometry;

update
    nyc_311
set
    geom = buildpoint(longitude, latitude, 4326);

8.43

create
or replace view nyc_311_dbscan_noise as

-- Create the clustering functon using the Window funciton syntax
select
    id,
    ST_ClusterDBSCAN(st_transform(geom, 3857), 30, 30) over () AS cid,
    geom
from
    nyc_311
where

    -- Find just the complaints with "noise" in the description
    -- and that are in zip code 10009
    st_intersects(
        geom,
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10009'
        )
    )
    and complaint_type ilike '%noise%'

8.44

create
or replace view nyc_311_kmeans_noise as

-- Create the clustering functon using the Window funciton syntax
select
    id,
    ST_ClusterKMeans(st_transform(geom, 3857), 7, 1609) over () AS cid,
    geom
from
    nyc_311
where

    -- Find just the complaints with "noise" in the description
    -- and that are in zip code 10009
    st_intersects(
        geom,
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10009'
        )
    )
    and complaint_type ilike '%noise%'

8.45

create table nyc_311_clusterwithin_noise as 

-- Find the 311 calls in the 10009 zip code
with a as (
    select
        geom
    from
        nyc_311
    where
        st_intersects(
            geom,
            (
                select
                    st_transform(geom, 4326)
                from
                    nyc_zips
                where
                    zipcode = '10009'
            )
        )
        and complaint_type ilike '%noise%'
),

-- In CTE "b", we have to unnest the results since it returns
-- geometries in an array
b as (
    select
        st_transform(
            unnest(ST_ClusterWithin(st_transform(geom, 3857), 25)),
            4326
        ) AS geom
    from
        a
)

-- row_number() over() creates an id for each row starting at 1
select
    row_number() over() as id,
    geom
from
    b

8.8 Special Operators

8.46

select
    zipcode,
    st_transform(geom, 4326)
from
    nyc_zips
where
    -- Finds all of the zip codes that intersect the bounding box
    -- of the East Village
    st_transform(geom, 4326) && (
        select
            geom
        from
            nyc_2010_neighborhoods
        where
            ntaname = 'East Village'
    )
UNION

-- Query to show the East Village bounding box on the map using ST_Envelope
select
    'None' as zipcode,
    st_envelope(
        (
            select
                geom
            from
                nyc_2010_neighborhoods
            where
                ntaname = 'East Village'
        )
    )

8.47

select
    geom
from
    nyc_neighborhoods
where
    geom &< (
        select
            geom
        from
            nyc_neighborhoods
        where
            neighborhood = 'East Village'
    )

8.48

with ev as (
    select
        geom
    from
        nyc_neighborhoods
    where
        neighborhood = 'East Village'
),
ues as (
    select
        geom
    from
        nyc_neighborhoods
    where
        neighborhood = 'Upper East Side'
)
select
    ev.geom <-> ues.geom,
    st_distance(ev.geom, use.geom)
from
    ev,
    ues

8.49

with ev as (
    select
        geom :: geography
    from
        nyc_neighborhoods
    where
        neighborhood = 'East Village'
),
ues as (
    select
        geom :: geography
    from
        nyc_neighborhoods
    where
        neighborhood = 'Upper East Side'
)
select
    ev.geom <-> ues.geom as new_operator,
    st_distance(ev.geom, ues.geom)
from
    ev,
    ues

9 - Spatial Analysis

9.1 Analyses we have already seen

9.1

select
    spc_common,
    case
        when spc_common ilike '%oak%' then 'Oak'
        when spc_common ilike '%maple%' then 'Maple'
        when spc_common ilike '%pine%' then 'Pine'
        else NULL
    end as tree_type
from
    nyc_2015_tree_census
limit
    100

9.2

create temporary table stadiums_matrix as with stadiums as (
    select
        'Citi Field' as stadium,
        buildpoint(-73.845833, 40.756944, 4326) as geom
    union
    select
        'Yankees Stadium' as stadium,
) buildpoint(-73.926389, 40.829167, 4326) as geom
select
    a.stadium,
    b.neighborhood,
    st_distance(st_centroid(b.geom), a.geom)
from
    stadiums a,
    nyc_neighborhoods b

9.3

-- Find all the rows from the stadiums matrix for Citi Field
with mets as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Citi Field'
),

-- Find all the rows from the stadiums matrix for Yankees Stadium
yankees as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Yankees Stadium'
)

select
    a.neighborhood,
    b.st_distance as mets,
    c.st_distance as yankees
from
    nyc_neighborhoods a
    join mets b using (neighborhood)
    join yankees c using (neighborhood)

9.4

with mets as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Citi Field'
),
yankees as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Yankees Stadium'
)
select
    a.neighborhood,
    a.geom,
    b.st_distance as mets,
    c.st_distance as yankees
from
    nyc_neighborhoods a
    join mets b using (neighborhood)
    join yankees c using (ntaname)
where
    c.st_distance < b.st_distance

9.5

with mets as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Citi Field'
),
yankees as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Yankees Stadium'
)
select
    a.ntaname,
    a.geom,
    b.st_distance as mets,
    c.st_distance as yankees
from
    nyc_neighborhoods a
    join mets b using (neighborhood)
    join yankees c using (neighborhood)
where
    b.st_distance < c.st_distance

9.6

select
    st_makeenvelope(-74.047196, 40.679654, -73.906769, 40.882012, 4326)

9.7

select
    st_envelope(st_transform(geom, 4326)) as geom
from
    nyc_zips
where
    zipcode = '11231'

9.8

select
    st_collect(st_transform(geom, 4326)) as geom
from
    nyc_zips
where
    zipcode = '11231'

9.9

select
    *
from
    nyc_bike_routes
order by
    st_length(geom) desc
limit
    1

9.10

select
    st_lineinterpolatepoint(st_linemerge(geom), 0.5) as geom
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.11

select
    st_length(st_transform(geom, 3857))
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.12

select
    st_lineinterpolatepoint(
        -- This is the line created from ST_LineMerge
        st_linemerge(geom),

        -- Here we divide 500 by the total length of the route
        (
            500 / (
                select
                    st_length(st_transform(geom, 3857))
                from
                    nyc_bike_routes
                where
                    ogc_fid = 20667
            )
        )
    ) as geom
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.13

select
    st_lineinterpolatepoints(

        -- Our merged geometry
        st_linemerge(geom),

        -- Dividing the length of the route by 75
        (
            75 / (
                select
                    st_length(st_transform(geom, 3857))
                from
                    nyc_bike_routes
                where
                    ogc_fid = 20667
            )
        )
    ) as geom
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.14

select
    st_intersection(
        st_transform(geom, 4326),
        st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
    ) as geom
from
    nyc_zips

9.15

select
    st_difference(
        st_transform(geom, 4326),
        st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
    ) as geom
from
    nyc_zips

9 - Spatial Analysis

9.1 Analyses we have already seen

9.1

select
    spc_common,
    case
        when spc_common ilike '%oak%' then 'Oak'
        when spc_common ilike '%maple%' then 'Maple'
        when spc_common ilike '%pine%' then 'Pine'
        else NULL
    end as tree_type
from
    nyc_2015_tree_census
limit
    100

9.2

create temporary table stadiums_matrix as with stadiums as (
    select
        'Citi Field' as stadium,
        buildpoint(-73.845833, 40.756944, 4326) as geom
    union
    select
        'Yankees Stadium' as stadium,
) buildpoint(-73.926389, 40.829167, 4326) as geom
select
    a.stadium,
    b.neighborhood,
    st_distance(st_centroid(b.geom), a.geom)
from
    stadiums a,
    nyc_neighborhoods b

9.3

-- Find all the rows from the stadiums matrix for Citi Field
with mets as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Citi Field'
),

-- Find all the rows from the stadiums matrix for Yankees Stadium
yankees as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Yankees Stadium'
)

select
    a.neighborhood,
    b.st_distance as mets,
    c.st_distance as yankees
from
    nyc_neighborhoods a
    join mets b using (neighborhood)
    join yankees c using (neighborhood)

9.4

with mets as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Citi Field'
),
yankees as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Yankees Stadium'
)
select
    a.neighborhood,
    a.geom,
    b.st_distance as mets,
    c.st_distance as yankees
from
    nyc_neighborhoods a
    join mets b using (neighborhood)
    join yankees c using (ntaname)
where
    c.st_distance < b.st_distance

9.5

with mets as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Citi Field'
),
yankees as (
    select
        *
    from
        stadiums_matrix
    where
        stadium = 'Yankees Stadium'
)
select
    a.ntaname,
    a.geom,
    b.st_distance as mets,
    c.st_distance as yankees
from
    nyc_neighborhoods a
    join mets b using (neighborhood)
    join yankees c using (neighborhood)
where
    b.st_distance < c.st_distance

9.6

select
    st_makeenvelope(-74.047196, 40.679654, -73.906769, 40.882012, 4326)

9.7

select
    st_envelope(st_transform(geom, 4326)) as geom
from
    nyc_zips
where
    zipcode = '11231'

9.8

select
    st_collect(st_transform(geom, 4326)) as geom
from
    nyc_zips
where
    zipcode = '11231'

9.9

select
    *
from
    nyc_bike_routes
order by
    st_length(geom) desc
limit
    1

9.10

select
    st_lineinterpolatepoint(st_linemerge(geom), 0.5) as geom
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.11

select
    st_length(st_transform(geom, 3857))
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.12

select
    st_lineinterpolatepoint(
        -- This is the line created from ST_LineMerge
        st_linemerge(geom),

        -- Here we divide 500 by the total length of the route
        (
            500 / (
                select
                    st_length(st_transform(geom, 3857))
                from
                    nyc_bike_routes
                where
                    ogc_fid = 20667
            )
        )
    ) as geom
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.13

select
    st_lineinterpolatepoints(

        -- Our merged geometry
        st_linemerge(geom),

        -- Dividing the length of the route by 75
        (
            75 / (
                select
                    st_length(st_transform(geom, 3857))
                from
                    nyc_bike_routes
                where
                    ogc_fid = 20667
            )
        )
    ) as geom
from
    nyc_bike_routes
where
    ogc_fid = 20667

9.14

select
    st_intersection(
        st_transform(geom, 4326),
        st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
    ) as geom
from
    nyc_zips

9.15

select
    st_difference(
        st_transform(geom, 4326),
        st_makeenvelope(-73.981667, 40.76461, -73.949314, 40.800368, 4326)
    ) as geom
from
    nyc_zips

9.2 New analyses

9.16

select
    st_generatepoints(st_transform(geom, 4326), 50) as geom
from
    nyc_zips
where
    zipcode = '10001'

9.17

select
    (
        st_dump(
            st_generatepoints(st_transform(geom, 4326), 5000)
        )
    ).geom as geom
from
    nyc_zips
where
    zipcode = '10001'

9.18

select
    st_transform(geom, 4326) as geom
from
    nyc_zips
where
    zipcode = '11101'

9.19

select
    (
        st_dump(
            st_generatepoints(st_transform(geom, 4326), 5000)
        )
    ).geom as geom
from
    nyc_zips
where
    zipcode = '11101'

9.20

-- The "points" CTE generates 5,000 random points in the 11101 postal code
with points as (
    select
        (
            st_dump(
                st_generatepoints(st_transform(geom, 4326), 5000)
            )
        ).geom as geom
    from
        nyc_zips
    where
        zipcode = '11101'
),

-- Create 6 even clusters using ST_ClusterKMeans
cluster as (
    select
        geom,
        st_clusterkmeans(geom, 6) over () AS cluster
    from
        points
)

-- Group or collect each cluster and find the centroid
select
    st_centroid(
        st_collect(geom)
    ) as geom,
    cluster
from
    cluster
group by
    cluster

9.21

with points as (
    select
        (
            st_dump(
                st_generatepoints(st_transform(geom, 4326), 5000)
            )
        ).geom as geom
    from
        nyc_zips
    where
        zipcode = '11101'
),
cluster as (
    select
        geom,
        st_clusterkmeans(geom, 6) over () AS cluster
    from
        points
),
centroid as (
    select
        st_centroid(st_collect(geom)) as geom,
        cluster
    from
        cluster
    group by
        cluster
)

-- In this step we collect the centroids
-- then create Voronoi polygons, then extract
-- each individual polygon
select
    (
        st_dump(st_voronoipolygons(st_collect(geom)))
    ).geom AS geom
from
    centroid

9.22

with points as (
    select
        (
            st_dump(
                st_generatepoints(st_transform(geom, 4326), 5000)
            )
        ).geom as geom
    from
        nyc_zips
    where
        zipcode = '11101'
),
cluster as (
    select
        geom,
        st_clusterkmeans(geom, 6) over () AS cluster
    from
        points
),
centroid as (
    select
        st_centroid(st_collect(geom)) as geom,
        cluster
    from
        cluster
    group by
        cluster
),
voronoi as (
    select
        (
            st_dump(st_voronoipolygons(st_collect(geom)))
        ).geom AS geom
    from
        centroid
)

-- In the last step, we find the intersection (or clip)
-- the 11101 zip code and the Voronoi polygons
select
    st_intersection(
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '11101'
        ),
        geom
    )
from
    voronoi

9.23

-- Setting up our function and inputs
create
or replace function st_equalarea(
    seed_points int,
    tablename text,
    polygons int,
    unique_id text
) 

-- Define that the function returns a table and the return values

returns table (id varchar, geom geometry) language plpgsql 
as $$ 
begin 
return query 

-- This will run the string as a query, filling in the values
execute format(
    'with points as (select %s as id, 
    (st_dump( st_generatepoints( geom , %s) )).geom as geom from %s), 
    cluster as (select geom, id, st_clusterkmeans(geom, %s) 
    over (partition by id) AS cluster from points), 
    centroid as (select id, st_centroid(st_collect(geom)) as geom, 
    cluster from cluster group by cluster, id), 
    voronoi as (select id, (st_dump( st_voronoipolygons( st_collect(geom) ) )).geom 
    AS voronoi_geom from centroid group by id) 
    select b.id, st_intersection(a.geom, b.voronoi_geom) as 
    geom from voronoi b join %s a on b.id = %s;',

    -- These values will replace each instance of %s in the order they appear
    unique_id,
    seed_points,
    tablename,
    polygons,
    tablename,
    unique_id
);

end;
$$

9.24

select
    *
from
    st_equalarea(1000, 'nyc_zips', 6, 'zipcode')

9.25

select
    id,
    st_transform(geom, 4326) as geom
from
    st_equalarea(1000, 'nyc_zips', 6, 'zipcode')

9.26

create
or replace function st_equalareawithinput(
    seed_points int,
    tablename text,
    polygons int,
    unique_id text
) returns table (id varchar, geom geometry) language plpgsql as $$ begin return query execute format(
    'with points as (select %s as id, (st_dump( st_generatepoints( geom , %s) )).geom 
    as geom from %s), 
    area as (select st_area(geom) as area, zipcode from %s), 
    cluster as (select points.geom, points.id, 
    st_clusterkmeans(points.geom, ceil((a.area/(%s)))::int) over (partition by id) AS cluster 
    from points left join area a on a.zipcode = points.id), 
    centroid as (select id, st_centroid(st_collect(geom)) as geom, 
    cluster from cluster group by cluster, id), 
    voronoi as (select id,(st_dump( st_voronoipolygons( st_collect(geom) ) )).geom AS voronoi_geom 
    from centroid group by id) select b.id, st_intersection(a.geom, b.voronoi_geom) as geom 
    from voronoi b join %s a on b.id = %s;',
    unique_id,
    seed_points,
    tablename,
    tablename,
    polygons,
    tablename,
    unique_id
);

end;

$$

9.29

create table nyc_zips_50_acres as
select
    id,
    st_transform(geom, 4326) as geom
from
    st_equalareawithinput(1000, 'nyc_zips', (43560 * 50), 'zipcode')

9.30

-- Create a point at the Plaza Hotel
with point as (
    select
        buildpoint(-73.9730423, 40.7642784, 4326) as geom
),

-- Find all the pickups within 50 meters of the Plaza
-- on June 1st, 2016 and calculate distance with a cross
-- join. Since it is just one point the cross join is acceptable
-- since it is just one row in that table.
start as (
    select
        *
    from
        nyc_yellow_taxi_0601_0615_2016,
        point
    where
        st_dwithin(pickup :: geography, point.geom :: geography, 50)
        and pickup_datetime :: date = '06-01-2016'
)
select
    ogc_fid,
    trip_distance,
    total_amount,
    
    -- Create a line from the Plaza (in the subquery in the second argument)
    -- and all the dropoff locations
    st_makeline(
        dropoff,
        (
            select
                geom
            from
                point
        )
    ) as geom
from
    start

9.31

select
    a.id,
    st_makeline(a.geom, b.geom) as geom
from
    destinations a
    left join origins b using (id)

9.32

select
    geom,
    health,
    spc_common
from
    nyc_2015_tree_census
order by
    st_distance(buildpoint(-73.977733, 40.752273, 4326), geom)
limit
    3

9.33

select
    geom,
    health,
    spc_common
from
    nyc_2015_tree_census
order by
    buildpoint(-73.977733, 40.752273, 4326) <-> geom
limit
    3

9.34

-- Create a point at John's of Bleeker Street
with point as (
    select
        buildpoint(-74.003544, 40.7316243, 4326) :: geography as geog
),

-- Find all the buildings that are within 1 kilometer of John's
buildings as (
    select
        geom,
        name,
        mpluto_bbl
    from
        nyc_building_footprints
    where
        st_dwithin(
            geom::geography,
            (
                select
                    geog
                from
                    point
            ),
            1000
        )
)

-- Selects three columns from the "buildings" CTE
-- And one from the cross join lateral
select
    geom,
    name,
    mpluto_bbl,
    nearest.distance
from
    buildings

    -- This join selects the distance to the nearest fire hydrant laterally
    -- or for each row in the "buildings" dataset. As you can see it uses
    -- columns from the outside query, and limits the results to 1
    cross join lateral (
        select
            unitid,
            st_distance(geom :: geography, buildings.geom :: geography) as distance
        from
            nyc_fire_hydrants
        order by
            geom <-> buildings.geom
        limit
            1
    ) nearest

9.35

create table nearest_hydrant_pizza as with point as (
	select
		buildpoint(-74.003544, 40.7316243, 4326) :: geography as geog
),
buildings as (
	select
		geom,
		name,
		mpluto_bbl
	from
		nyc_building_footprints
	where
		st_dwithin(
			geom :: geography,
			(
				select
					geog
				from
					point
			),
			1000
		)
)
select
	geom,
	name,
	mpluto_bbl,
	nearest.distance
from
	buildings
	cross join lateral (
		select
			unitid,
			st_distance(geom :: geography, buildings.geom :: geography) as distance
		from
			nyc_fire_hydrants
		order by
			geom <-> buildings.geom
		limit
			1
	) nearest

Import road centerlines

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \ dbname=gis port=25432" \
street_centerlines.geojson \
-nln nyc_street_centerlines -lco GEOMETRY_NAME=geom

9.36

select
    *
from
    nyc_yellow_taxi_0601_0615_2016
where
    st_dwithin(
        pickup :: geography,
        buildpoint(-73.987224, 40.733342, 4326) :: geography,
        300
    )
    and pickup_datetime between '2016-06-02 9:00:00+00'
    and '2016-06-02 18:00:00+00'

9.37

-- Select all pickups within 300 meters of Union Square
-- between 9am and 6pm on June 2nd, 2016
with pickups as (
    select
        pickup,
        tip_amount,
        total_amount
    from
        nyc_yellow_taxi_0601_0615_2016
    where
        st_dwithin(
            pickup :: geography,
            buildpoint(-73.987224, 40.733342, 4326) :: geography,
            300
        )
        and pickup_datetime between '2016-06-02 9:00:00+00'
        and '2016-06-02 18:00:00+00'
)

-- Find the nearest road to each point using a 
-- cross join lateral
select
    a.*,
    street.name,
    street.ogc_fid,
    street.geom
from
    pickups a
    cross join lateral (
        select
            ogc_fid,
            full_stree as name,
            geom
        from
            nyc_street_centerlines
        order by
            a.pickup <-> geom
        limit
            1
    ) street

9.38

with pickups as (
    select
        pickup,
        tip_amount,
        total_amount
    from
        nyc_yellow_taxi_0601_0615_2016
    where
        st_dwithin(
            pickup :: geography,
            buildpoint(-73.987224, 40.733342, 4326) :: geography,
            300
        )
        and pickup_datetime between '2016-06-02 9:00:00+00'
        and '2016-06-02 18:00:00+00'
),
nearest as (
    select
        a.*,
        street.name,
        street.ogc_fid,
        street.geom
    from
        pickups a
        cross join lateral (
            select
                ogc_fid,
                full_stree as name,
                geom
            from
                nyc_street_centerlines
            order by
                a.pickup <-> geom
            limit
                1
        ) street
)
select
    a.*,

    -- Create a line between the original point and the new snapped point
    st_makeline(
        pickup,
        st_lineinterpolatepoint(
            st_linemerge(b.geom),
            st_linelocatepoint(st_linemerge(b.geom), pickup)
        )
    ) as line,

    -- Add a column for the snapped point
    st_lineinterpolatepoint(
        st_linemerge(b.geom),
        st_linelocatepoint(st_linemerge(b.geom), pickup)
    ) as snapped
from
    nearest a
    join nyc_street_centerlines b using (ogc_fid)

9.39

create
or replace view nyc_taxi_union_square as with pickups as (
    select
        pickup,
        tip_amount,
        total_amount
    from
        nyc_yellow_taxi_0601_0615_2016
    where
        st_dwithin(
            pickup :: geography,
            buildpoint(-73.987224, 40.733342, 4326) :: geography,
            300
        )
        and pickup_datetime between '2016-06-02 9:00:00+00'
        and '2016-06-02 18:00:00+00'
),
nearest as (
    select
        a.*,
        street.name,
        street.ogc_fid,
        street.geom
    from
        pickups a
        cross join lateral (
            select
                ogc_fid,
                full_stree as name,
                geom
            from
                nyc_street_centerlines
            order by
                a.pickup <-> geom
            limit
                1
        ) street
)
select
    a.*,
    st_makeline(
        pickup,
        st_lineinterpolatepoint(
            st_linemerge(b.geom),
            st_linelocatepoint(st_linemerge(b.geom), pickup)
        )
    ) as line,
    st_lineinterpolatepoint(
        st_linemerge(b.geom),
        st_linelocatepoint(st_linemerge(b.geom), pickup)
    ) as snapped
from
    nearest a
    join nyc_street_centerlines b using (ogc_fid)

9.40

create table nyc_taxi_union_square_snapped as
select
    snapped,
    name,
    ogc_fid
from
    nyc_taxi_union_square;
    
create table nyc_taxi_union_square_roads as
select
    geom,
    name,
    ogc_fid
from
    nyc_taxi_union_square; 
    
create table nyc_taxi_union_square_points as
select
    pickup,
    name,
    ogc_fid
from
    nyc_taxi_union_square; 
    
create table nyc_taxi_union_square_lines as
select
    lines,
    name,
    ogc_fid
from
    nyc_taxi_union_square;

9.41

-- Calculate the tip percentage for each trip with a total over $0
with a as (
    select
        *,
        tip_amount /(total_amount - tip_amount) as tip_percent
    from
        nyc_taxi_union_square
    where
        total_amount > 0
)


-- Get the average tip percentage for each road segment
select
    avg(a.tip_percent),
    b.geom,
    b.ogc_fid
from
    a
    join nyc_street_centerlines b using (ogc_fid)
group by
    b.geom,
    b.ogc_fid

9.42

create table nyc_taxi_union_square_tips as with a as (
    select
        *,
        tip_amount / total_amount as tip_percent
    from
        nyc_taxi_union_square
    where
        total_amount > 0
)
select
    avg(a.tip_percent),
    b.geom,
    b.ogc_fid
from
    a
    join nyc_street_centerlines b using (ogc_fid)
group by
    b.geom,
    b.ogc_fid

9.43

select
    mode() within group (
        ORDER BY
            a.spc_common
    ) as most_common
from
    nyc_2015_tree_census a
    join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
where
    b.neighborhood = 'East Village'

9.44

select
    count(a.*),
    a.spc_common
from
    nyc_2015_tree_census a
    join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
where
    b.neighborhood = 'East Village'
group by
    a.spc_common
order by
    count(a.*) desc

9.45

with building as (
    select
        geom
    from
        nyc_building_footprints
    where
        name = 'Empire State Building'
)
select
    st_shortestline(h.geom, b.geom)
from
    nyc_fire_hydrants h,
    building b
order by
    h.geom <-> b.geom asc
limit
    100

9.46

select
    array(
        select
            geom
        from
            nyc_311
        where
            geom is not null
        limit
            10
    )

9.47

select
    st_makeline(
        array(
            select
                geom
            from
                nyc_311
            where
                geom is not null
            order by
                id
            limit
                10
        )
    )

9.48

select

    -- Clips the 100 meter buffer out of the 10001 zip code
    st_difference(
        st_transform(geom, 4326),

        -- Creates a 100 meter buffer
        st_buffer(
            buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
            100
        ) :: geometry
    )
from
    nyc_zips
where
    zipcode = '10001'

9.49

with a as (
    select
        10001 as id,
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    id,

    -- Takes only the exterior perimiter of the polygon
    st_exteriorring(geom) as geom
from
    a

9.50

with a as (
    select
        10001 as id,
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10001'
)
select
    id,

    -- Creates a new polygon just from the exterior ring
    -- which removes all holes
    st_makepolygon(
        st_exteriorring(geom)
    ) as geom
from
    a

9.51

-- Creates a polygon with a hole in zip code 10001
with a as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10001'
),

-- Creates a polygon with a hole in zip code 10017
b as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10017'
),

-- Unions both polygons into a multi-polygon
c as (
    select
        1 as id,
        st_union(a.geom, b.geom)
    from
        a,
        b
)
select
    *
from
    c

9.52

with a as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10001'
),
b as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10017'
),
c as (
    select
        1 as id,
        st_union(a.geom, b.geom) as geom
    from
        a,
        b
)
select
    id,
    (st_dump(geom)).geom
from
    c

9.53

with a as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10001'
),
b as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10017'
),
c as (
    select
        1 as id,
        st_union(a.geom, b.geom) as geom
    from
        a,
        b
)
select
    id,

    -- Takes the exterior ring from the geometry dump
    st_exteriorring(
        (st_dump(geom)).geom
    )
from
    c

9.54

with a as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9936596, 40.7505483, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10001'
),
b as (
    select
        st_difference(
            st_transform(geom, 4326),
            st_buffer(
                buildpoint(-73.9773136, 40.7526559, 4326) :: geography,
                100
            ) :: geometry
        ) as geom
    from
        nyc_zips
    where
        zipcode = '10017'
),
c as (
    select
        1 as id,
        st_union(a.geom, b.geom) as geom
    from
        a,
        b
)
select
    id,

    -- Collect the individual geometries to turn them back into 
    -- one polygon
    st_collect(st_makepolygon(geom)) as geom
from
    (
        select
            id,
            st_exteriorring((st_dump(geom)).geom) as geom
        from
            c
    ) s
group by
    id;

9.55

select
    *
from
    st_maximuminscribedcircle(
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10001'
        )
    )

9.56

select
    center
from
    st_maximuminscribedcircle(
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10001'
        )
    )

9.57

select
    st_buffer(center, radius)
from
    st_maximuminscribedcircle(
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10001'
        )
    )

9.58

select
    st_transform(center, 4326) as geom
from
    st_maximuminscribedcircle(
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10001'
        )
    )
union
select
    st_centroid(st_transform(geom, 4326)) as geom
from
    nyc_zips
where
    zipcode = '10001'

9.3 Lines to polygons, and polygons to lines

9.59

select
    st_boundary(st_transform(geom, 4326))
from
    nyc_zips
where
    zipcode = '10001'

9.60

select
    st_makeline(
        st_pointn(
            st_boundary(st_transform(z.geom, 4326)),
            numbers.num
        ),
        st_pointn(
            st_boundary(st_transform(z.geom, 4326)),
            numbers.num + 1
        )
    ),
    numbers.num
from
    nyc_zips z
    cross join lateral generate_series(1, st_npoints(z.geom) - 1) as numbers(num)
where
    z.zipcode = '10001'

9.61

select
    st_polygonize(
        st_makeline(
            st_pointn(
                st_boundary(st_transform(z.geom, 4326)),
                numbers.num
            ),
            st_pointn(
                st_boundary(st_transform(z.geom, 4326)),
                numbers.num + 1
            )
        )
    )
from
    nyc_zips z
    cross join lateral generate_series(1, st_npoints(z.geom) - 1) as numbers(num)
where
    z.zipcode = '10001'

9.4 Snap points to grid

9.62

select
    st_transform(
        st_snaptogrid(st_transform(geom, 3857), 500, 1000),
        4326
    ) as geom
from
    nyc_311
limit
    100000

9.5 Tessellate triangles

9.63

select
    st_delaunaytriangles(st_transform(geom, 4326))
from
    nyc_zips
where
    zipcode = '10009'

9.64

with a as (
    select
        (
            -- Create a dump to return the individual triangles
            st_dump(

                -- Create the triangles like above
                st_delaunaytriangles(st_transform(geom, 4326))
            )
        ).geom
    from
        nyc_zips
    where
        zipcode = '10009'
)

-- Select and order by areas 
select
    a.geom,
    st_area(geom) as area
from
    a
order by
    st_area(geom) desc
limit
    10

9.6 Tapered buffers

9.65

-- Select all bike routes on Hudson Street
with lines as (
    select
        1 as id,
        st_linemerge(st_union(geom)) as geom
    from
        nyc_bike_routes
    where
        street = 'HUDSON ST'
)
select
    *
from
    lines

9.66

with lines as (
    select
        1 as id,
        ST_LineMerge(st_union(geom)) as geom
    from
        nyc_bike_routes
    where
        street = 'HUDSON ST'
),

-- Dump all of the points and find the length of the geometry
first as (
    select
        id,
        st_dumppoints(geom) as dump,
        st_length(geom) as len,
        geom
    from
        lines
)
select
    *
from
    first

9.67

with lines as (
    select
        1 as id,
        ST_LineMerge(st_union(geom)) as geom
    from
        nyc_bike_routes
    where
        street = 'HUDSON ST'
),
first as (
    select
        id,
        st_dumppoints(geom) as dump,
        st_length(geom) as len,
        geom
    from
        lines
),

-- For each path, select the id, path, and a buffer
-- around the path point. Using ST_LineLocatePoint
-- we use the line geometry and the point to find 
-- the distance along the line, then make it smaller
-- using the log of the length 
second as (
    select
        id,
        (dump).path [1],
        st_buffer(
            (dump).geom,
            ST_LineLocatePoint(geom, (dump).geom) * log(len)
        ) as geom
    from
        first
)
select
    *
from
    second

9.68

with lines as (
    select
        1 as id,
        st_linemerge(st_union(geom)) as geom
    from
        nyc_bike_routes
    where
        street = 'HUDSON ST'
),
first as (
    select
        id,
        st_dumppoints(geom) as dump,
        st_length(geom) as len,
        geom
    from
        lines
),
second as (
    select
        id,
        (dump).path [1],
        st_buffer(
            (dump).geom,
            ST_LineLocatePoint(geom, (dump).geom) * len / 10
        ) as geom
    from
        first
),

-- Create a convex hull around the buffers by union-ing
-- all the buffers together. These are ordered using the 
-- LEAD window function and partition
third as (
    select
        id,
        st_convexhull(
            st_union(
                geom,
                lead(geom) over(
                    partition by id
                    order by
                        id,
                        path
                )
            )
        ) as geom
    from
        second
)
select
    id,
    st_union(geom) as geom
from
    third
group by
    id

9.69

with lines as (
    select
        1 as id,
        st_linemerge(st_union(geom)) as geom
    from
        nyc_bike_routes
    where
        street = 'HUDSON ST'
),
first as (
    select
        id,
        st_dumppoints(geom) as dump,
        st_length(geom) as len,
        geom
    from
        lines
),
second as (
    select
        id,
        (dump).path [1],
        st_transform(
            st_buffer(st_transform((dump).geom, 3857), random() * 100),
            4326
        ) as geom
    from
        first
),
third as (
    select
        id,
        st_convexhull(
            st_union(
                geom,
                lead(geom) over(
                    partition by id
                    order by
                        id,
                        path
                )
            )
        ) as geom
    from
        second
)
select
    id,
    st_union(geom) as geom
from
    third
group by
    id

9.70

select
    st_symdifference(
        (
            select
                geom
            from
                nyc_neighborhoods
            where
                neighborhood = 'NoHo'
        ),
        (
            select
                st_transform(geom, 4326)
            from
                nyc_zips
            where
                zipcode = '10012'
        )
    )

10 - Advanced spatial analytics

10.1 Spatial data enrichment or area weighted interpolation

10.1

ogrinfo ACS_2021_5YR_BG_36_NEW_YORK.gdb ACS_2021_5YR_BG_36_NEW_YORK -geom=YES -so

Other import code

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, b01001e1 as population, B01002e1 as age from X01_AGE_AND_SEX" \
-nln nys_2021_census_block_groups_pop -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \ dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, B19001e1 as income from X19_INCOME" \ 
-nln nys_2021_census_block_groups_income -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select SHAPE as geom, GEOID as geoid from ACS_2021_5YR_BG_36_NEW_YORK" \
-nln nys_2021_census_block_groups_geom -lco GEOMETRY_NAME=geom

10.3

update nyc_neighborhoods set geom = st_makevalid(geom) where st_isvalid(geom) is false

10.4

select
    neighborhood,

    -- These are the values from the cross join lateral
    a.pop,
    a.count,
    a.avg
from
    nyc_neighborhoods
    cross join lateral (
        select

            -- This selects the sum of all the intersecting areas
            -- populations using the proportional overlap calculation
            sum(
                population * (
                    st_area(st_intersection(geom, nyc_neighborhoods.geom)) / st_area(geom)
                )
            ) as pop,
            count(*) as count,

            -- This selects the average area overlapping area
            -- of all the intersecting areas
            avg(
                (
                    st_area(st_intersection(nyc_neighborhoods.geom, geom)) / st_area(geom)
                )
            ) as avg
        from
            nys_2021_census_block_groups
        where
            left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
            and st_intersects(nyc_neighborhoods.geom, geom)
    ) a
order by
    a.pop desc

10 - Advanced spatial analytics

10.1 Spatial data enrichment or area weighted interpolation

10.1

ogrinfo ACS_2021_5YR_BG_36_NEW_YORK.gdb ACS_2021_5YR_BG_36_NEW_YORK -geom=YES -so

Other import code

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, b01001e1 as population, B01002e1 as age from X01_AGE_AND_SEX" \
-nln nys_2021_census_block_groups_pop -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \ dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select GEOID as geoid, B19001e1 as income from X19_INCOME" \ 
-nln nys_2021_census_block_groups_income -lco GEOMETRY_NAME=geom
ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" ACS_2021_5YR_BG_36_NEW_YORK.gdb \
-dialect sqlite -sql "select SHAPE as geom, GEOID as geoid from ACS_2021_5YR_BG_36_NEW_YORK" \
-nln nys_2021_census_block_groups_geom -lco GEOMETRY_NAME=geom

10.3

update nyc_neighborhoods set geom = st_makevalid(geom) where st_isvalid(geom) is false

10.4

select
    neighborhood,

    -- These are the values from the cross join lateral
    a.pop,
    a.count,
    a.avg
from
    nyc_neighborhoods
    cross join lateral (
        select

            -- This selects the sum of all the intersecting areas
            -- populations using the proportional overlap calculation
            sum(
                population * (
                    st_area(st_intersection(geom, nyc_neighborhoods.geom)) / st_area(geom)
                )
            ) as pop,
            count(*) as count,

            -- This selects the average area overlapping area
            -- of all the intersecting areas
            avg(
                (
                    st_area(st_intersection(nyc_neighborhoods.geom, geom)) / st_area(geom)
                )
            ) as avg
        from
            nys_2021_census_block_groups
        where
            left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
            and st_intersects(nyc_neighborhoods.geom, geom)
    ) a
order by
    a.pop desc

10.2 Nearest neighbor in report format

10.5

ogr2ogr \
    - f PostgreSQL PG :"host=localhost user=docker password=docker \ 
    dbname=gis port=25432" Subway_Entrances.geojson \ 
    - nln nyc_subway_enterances - lco GEOMETRY_NAME = geom

10.6

select
    sw.name,
    sw.objectid,
    near.tree_id,
    near.spc_common,
    near.distance
from
    nyc_subway_enterances sw

    -- Since this is a cross join it will join to every possible combination
    -- Since we have a limit of 5 below, it will join it to each row in the
    -- main subway enterances table 5 times
    cross join lateral (
        select
            tree_id,
            spc_common,
            st_distance(sw.geom :: geography, geom :: geography) as distance
        from
            nyc_2015_tree_census
        order by
            sw.geom <-> geom
        limit
            5
    ) near 

10.7

select
    sw.name,
    sw.objectid,
    near.tree_id,
    near.spc_common,
    near.distance,
    near.ranking
from
    nyc_subway_enterances sw
    cross join lateral (
        select
            tree_id,
            spc_common,
            st_distance(sw.geom :: geography, geom :: geography) as distance,
            row_number() over() as ranking
        from
            nyc_2015_tree_census
        order by
            sw.geom <-> geom
        limit
            5
    ) near
limit
    50

10.8

select
    sw.name,
    sw.objectid,
    near.tree_id,
    near.spc_common,
    near.distance,
    rank() over(

        -- The partition will perform the ranking for each 
        -- subway enterance ID for the 5 matching trees linked
        -- to that station ID
        partition by sw.objectid

        -- The ranking will be ordered by the distance
        order by
            distance desc
    )
from
    nyc_subway_enterances sw
    cross join lateral (
        select
            tree_id,
            spc_common,
            st_distance(sw.geom :: geography, geom :: geography) as distance
        from
            nyc_2015_tree_census
        order by
            sw.geom <-> geom
        limit
            5
    ) near
limit
    50

10.3 Flat line of sight

10.9

ogr2ogr \ 
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=gis port=25432" planimetrics_2018_roofprints.json \ 
-nln denver_buildings -lco GEOMETRY_NAME=geom

10.10

select
    geom,
    bldg_heigh,
    ground_ele,
    gid
from
    denver_buildings
order by
    random()
limit
    2

10.11

with a as (
    select
        geom,
        bldg_heigh,
        ground_ele,
        gid
    from
        denver_buildings
    order by
        random()
    limit
        2
)
select

    -- This will create an array for the two buildings above.
    -- One below for the centroid
    array_agg(st_centroid(st_transform(geom, 4326))) as geom,

    -- One for the building height + ground height
    array_agg(bldg_heigh + ground_ele) as height,

    -- One for the building IDs
    array_agg(gid) as id
from
    a

10.12

with a as (
    select
        geom,
        bldg_heigh,
        ground_ele,
        gid
    from
        denver_buildings
    order by
        random()
    limit
        2
), bldgs as (
    select
        array_agg(st_centroid(st_transform(geom, 4326))) as geom,
        array_agg(bldg_heigh + ground_ele) as height,
        array_agg(gid) as id
    from
        a
),
line as (
    select

        -- Here we create a line between the two centroids
        -- using both the geometries in individual subqueries
        st_makeline(
            (
                select
                    geom [1]
                from
                    bldgs
            ),
            (
                select
                    geom [2]
                from
                    bldgs
            )
        )
)
select
    *
from
    line

10.13

with a as (
    select
        geom,
        bldg_heigh,
        ground_ele,
        gid
    from
        denver_buildings
    order by
        random()
    limit
        2
), bldgs as (
    select
        array_agg(st_centroid(st_transform(geom, 4326))) as geom,
        array_agg(bldg_heigh + ground_ele) as height,
        array_agg(gid) as id
    from
        a
),
line as (

    -- Here we use a simple select statement rather tha subqueries 
    -- like the previous step to grab the height column too
    select
        st_makeline(geom [1], geom [2]) as geom,
        height
    from
        bldgs
)
select

    -- This will return all the buildings higher than the line
    b.gid,
    b.bldg_heigh + b.ground_ele as height,
    st_transform(b.geom, 4326),
    line.height
from
    denver_buildings b
    join line on st_intersects(line.geom, st_transform(b.geom, 4326))
where

    -- This finds any building taller than either of our two buildings
    b.bldg_heigh + b.ground_ele < line.height [1]
    or b.bldg_heigh + b.ground_ele < line.height [2]

10.4 3D line of sight

10.14

alter table
    denver_buildings
add
    column geom_z geometry

10.15

update
    denver_buildings
set
    geom_z = st_force3d(

        -- First value is the building geometry in EPSG 4326
        st_transform(geom, 4326),

        -- This is our Z or height value, the building height 
        -- plus the ground elevation
        bldg_heigh + ground_ele
    )

10.16

-- Find our first building
with a as (
    select
        geom_z,
        gid
    from
        denver_buildings
    limit
        1 offset 100
),

-- Find a building with its ID and GeometryZ
-- within 2 kilometers
b as (
    select
        geom_z,
        gid
    from
        denver_buildings
    where
        st_dwithin(
            st_transform(geom, 3857),
            (
                select
                    st_transform(geom, 3857)
                from
                    a
            ),
            2000
        )
    limit
        1
), 

-- Use UNION to create a single table with matching columns
c as (
    select
        *
    from
        a
    union
    select
        *
    from
        b
),

-- Store the geometries and and IDs in arrays
bldgs as (
    select
        array_agg(st_centroid(geom_z)) as geom,
        array_agg(gid) as id
    from
        c
),

-- This query finds all the buildings within 3 kilometers of each building
denver as (
    select
        st_transform(geom, 3857) as geom,
        geom_z,
        gid
    from
        denver_buildings
    where
        st_dwithin(

            -- We union the two points and turn them back into 2D
            -- Geometries so we can run the query to find all 
            -- buildings with 3 kilometers one time
            st_union(
                (
                    select
                        st_force2d(st_transform(geom [1], 3857))
                    from
                        bldgs
                ),
                (
                    select
                        st_force2d(st_transform(geom [2], 3857))
                    from
                        bldgs
                )
            ),
            st_transform(geom, 3857),
            3000
        )
)
select
    d.gid,
    st_transform(d.geom, 4326) as geom
from
    denver d

    -- Now we can use our ST_3DIntersects funciton to find 
    -- all the buildings crossing our path
    join bldgs on st_3dintersects(

        -- The path can be built using the same ST_MakeLine
        -- function we have been using
        st_makeline(bldgs.geom [1], bldgs.geom [2]),
        d.geom_z
    )

10.5 Calculate polygons that share a border

10.17

create table nyc_2021_census_block_groups_morans_i as
select
    *
from
    nys_2021_census_block_groups
where
    left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
    and population > 0

10.18

with a as (
    select
        *
    from
        nyc_2021_census_block_groups
    where
        geoid = '360470201002'
)
select
    bgs.geoid,

    -- Finds the number of points of the 
    -- portion of the border that intersects
    st_npoints(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        )
    ) as intersected_points,

    -- Finds the length of the 
    -- portion of the border that intersects 
    st_length(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        ) :: geography
    ) as length,
	geom
from
    nyc_2021_census_block_groups bgs
where
    st_intersects(
        (
            select
                geom
            from
                a
        ),
        bgs.geom
    )
    and bgs.geoid != (
        select
            geoid
        from
            a
    )

10.19

with a as (
    select
        *
    from
        nyc_2021_census_block_groups
    where
        geoid = '360470201002'
)
select
    bgs.*,
    st_npoints(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        )
    ) as intersected_points,
    st_length(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        ) :: geography
    ) as length,
    geom
from
    nyc_2021_census_block_groups bgs
where

    -- Only select polygons that have a border overlap 
    -- of 2 points or more
    st_npoints(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        )
    ) > 1
    and bgs.geoid != (
        select
            geoid
        from
            a
    )

10.20

with a as (
    select
        *
    from
        nyc_2021_census_block_groups
    where
        geoid = '360470201002'
)
select
    bgs.*,
    st_npoints(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        )
    ) as intersected_points,
    st_length(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        ) :: geography
    ) as length,
    geom
from
    nyc_2021_census_block_groups bgs
where

    -- Only select polygons that have a border overlap 
    -- of 100 meters or more
    st_length(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        ) :: geography
    ) > 100
    and bgs.geoid != (
        select
            geoid
        from
            a
    )

10.21

with a as (
    select
        *
    from
        nyc_2021_census_block_groups
    where
        geoid = '360470201002'
)
select
    bgs.geoid,
    geom,
    st_npoints(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        )
    ) as intersected_points,
    st_length(
        st_intersection(
            bgs.geom,
            (
                select
                    geom
                from
                    a
            )
        ) :: geography
    ) as length,
    (
        -- Finding the length of the shared border
        st_length(
            st_intersection(
                bgs.geom,
                (
                    select
                        geom
                    from
                        a
                )
            ) :: geography

        -- Dividing that by the perimiter of the source polygon
        ) / st_perimeter(
            (
                select
                    geom :: geography
                from
                    a
            )
        )
    ) as percent_of_source
from
    nyc_2021_census_block_groups bgs
where
    -- Finding touching polygons that share more than 25% of
    -- the source polygon's border
    (
        st_length(
            st_intersection(
                bgs.geom,
                (
                    select
                        geom
                    from
                        a
                )
            ) :: geography
        ) / st_perimeter(
            (
                select
                    geom :: geography
                from
                    a
            )
        )
    ) >.25
    and bgs.geoid != (
        select
            geoid
        from
            a
    )

10.6 Finding the most isolated feature

10.23

alter table
    nyc_building_footprints
add
    column h3 text

10.24

update
    nyc_building_footprints
set
    h3 = h3_lat_lng_to_cell(
        st_centroid(
            st_transform(geom, 4326)
        ), 10)

10.25

select
    bin,
    st_transform(geom, 4326) as geom
from
    nyc_building_footprints b
where

    -- Returns all the H3 cells that meet the condition in the subquery
    h3 in (
        select
            h3
        from
            nyc_building_footprints

        -- First we group the H3 cells to use the aggregate function
        group by
            h3

        -- This finds all the H3 cells that have an aggregate
        -- count greater than 1
        having
            count(*) = 1
    )

10.26

select
    bin,
    closest.distance,
    st_transform(geom, 4326) as geom
from
    nyc_building_footprints b
    cross join lateral (
        select

            -- Finding the distance to the nearest building in meters
            st_distance(
                st_transform(geom, 3857),
                st_transform(b.geom, 3857)
            ) as distance
        from
            nyc_building_footprints

        -- This removes the ID of the building we want to analyze    
        where
            bin != b.bin
        order by
            geom <-> b.geom
        limit
            1
    ) closest
where
    h3 in (
        select
            h3
        from
            nyc_building_footprints
        group by
            h3
        having
            count(*) = 1
    )
order by
    closest.distance desc

10.27

select
    *
from
    nyc_building_footprints
where
    bin = '2127308'

10.7 Kernel density estimation (KDE)

10.35

create
or replace function st_kdensity(ids bigint [], geoms geometry []) 
returns table(id bigint, geom geometry, kdensity integer) as $$ declare mc geometry;

c integer;

k numeric;

begin mc := st_centroid(st_collect(geoms));

c := array_length(ids, 1);

k := sqrt(1 / ln(2));

return query with dist as (
    select
        t.gid,
        t.g,
        st_distance(t.g, mc) as distance
    from
        unnest(ids, geoms) as t(gid, g)
),
md as (
    select
        percentile_cont(0.5) within group (
            order by
                distance
        ) as median
    from
        dist
),
sd as (
    select
        sqrt(
            sum((st_x(g) - st_x(mc)) ^ 2) / c + sum((st_y(g) - st_y(mc)) ^ 2) / c
        ) as standard_distance
    from
        dist
),
sr as (
    select
        0.9 * least(sd.standard_distance, k * md.median) * c ^(-0.2) as search_radius
    from
        sd,
        md
)
select
    gid as id,
    g as geom,
    kd :: int as kdensity
from
    sr,
    dist as a,
    lateral(
        select
            count(*) as kd
        from
            dist _b
        where
            st_dwithin(a.g, _b.g, sr.search_radius)
    ) b;

end;

$$ language plpgsql immutable parallel safe;

10.36

create table east_village_kde as WITH a AS(
    SELECT
        array_agg(ogc_fid) as ids,
        array_agg(geom) as geoms
    FROM
        nyc_2015_tree_census
    where
        st_intersects(
            geom,
            (
                select
                    geom
                from
                    nyc_neighborhoods
                where
                    neighborhood = 'East Village'
            )
        )
)
SELECT
    b.*
FROM
    a,
    ST_KDensity(a.ids, a.geoms) b

10.8 Isovist

10.37

with buildings_0 as(
    select
        t.geom
    from

        -- Find all the buildingswithin 2 kilometers of Times Square where
        -- geometries are stored in an array
        unnest(
            (
                select
                    array_agg(geom)
                from
                    nyc_building_footprints
                where
                    st_dwithin(
                        geom :: geography,
                        st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
                        2000
                    )
            )
        ) as t(geom)
),
buildings_crop as(
    
    -- Now only the buildings within 630 meters of Times Square
    select
        geom
    from
        buildings_0
    where
        st_dwithin(
            st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
            geom :: geography,
            630
        )
),
buildings as(

    -- Union these two tables together
    select
        geom
    from
        buildings_crop
    union
    all
    select
        st_buffer(
            st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
            630
        ) :: geometry as geom
)
select
    *
from
    buildings

10.41

create
or replace function isovist(

    -- Sets up the arguments for our function
    in center geometry,
    in polygons geometry [],
    in radius numeric default 150,
    in rays integer default 36,
    in heading integer default -999,
    in fov integer default 360
) returns geometry as $$ declare arc numeric;

-- Creates static variables for angle and geometry
angle_0 numeric;

geomout geometry;

-- Calculates the arc distance using the field of view (fov)
-- degrees and the number of rays
begin arc := fov :: numeric / rays :: numeric;

if fov = 360 then angle_0 := 0;

else angle_0 := heading - 0.5 * fov;

end if;

with buildings_0 as(
    select
        t.geom
    from
        unnest(polygons) as t(geom)
),
buildings_crop as(
    select
        geom
    from
        buildings_0
    where
        st_dwithin(center :: geography, geom :: geography, radius)
),
buildings as(
    select
        geom
    from
        buildings_crop
    union
    all
    select
        st_buffer(center :: geography, radius) :: geometry as geom
),
rays as(
    select
        t.n as id,
        st_setsrid(
            st_makeline(
                center,
                st_project(
                    center :: geography,
                    radius + 1,
                    radians(angle_0 + t.n :: numeric * arc)
                ) :: geometry
            ),
            4326
        ) as geom
    from
        generate_series(0, rays) as t(n)
),
intersections as(
    select
        r.id,
        (
            st_dump(st_intersection(st_boundary(b.geom), r.geom))
        ).geom as point
    from
        rays r
        left join buildings b on st_intersects(b.geom, r.geom)
),
intersections_distances as(
    select
        id,
        point as geom,
        row_number() over(
            partition by id
            order by
                center <-> point
        ) as ranking
    from
        intersections
),
intersection_closest as(
    select
        -1 as id,
        case
            when fov = 360 then null :: geometry
            else center
        end as geom
    union
    all (
        select
            id,
            geom
        from
            intersections_distances
        where
            ranking = 1
        order by
            id
    )
    union
    all
    select
        999999 as id,
        case
            when fov = 360 then null :: geometry
            else center
        end as geom
),
isovist_0 as(
    select
        st_makepolygon(st_makeline(geom)) as geom
    from
        intersection_closest
),
isovist_buildings as(
    select
        st_collectionextract(st_union(b.geom), 3) as geom
    from
        isovist_0 i,
        buildings_crop b
    where
        st_intersects(b.geom, i.geom)
)
select
    coalesce(st_difference(i.geom, b.geom), i.geom) into geomout
from
    isovist_0 i,
    isovist_buildings b;

return geomout;

end;

$$ language plpgsql immutable;

10.42

select
    *
from
    isovist(

        -- This is our center point in Times Square
        st_setsrid(st_makepoint(-73.985136, 40.758786), 4326),
        (
            -- Selects all the geometries 
            select
                array_agg(geom)
            from
                nyc_building_footprints
            where
                st_dwithin(
                    geom :: geography,
                    st_setsrid(st_makepoint(-73.985136, 40.758786), 4326) :: geography,
                    2000
                )
        ),
        300,
        36
    )

11 - Suitability analysis

11.1 Market expansion potential

11.1

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis port=25432" nyc_pharmacies.geojson \
-nln nyc_pharmacies -lco GEOMETRY_NAME=geom

11.2

11.8

-- Selects necessary data for Duane Reade locations
with a as (
    select
        id,
        amenity,
        brand,
        name,
        geom
    from
        nyc_pharmacies
    where
        name ilike '%duane%'
),

-- Spatially join the Duane Reade stores to neighborhoods and adds neighborhood names
b as (
    select
        a.*,
        b.neighborhood
    from
        a
        join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
),

-- Creates a buffer for each store
c as (
    select
        id,
        st_buffer(st_transform(geom, 3857), 800) as buffer,
        neighborhood
    from
        b
),

-- Union the buffers together by neighborhood
d as (
    select
        st_transform(st_union(buffer), 4326) as geom,
        neighborhood
    from
        c
    group by
        neighborhood
),

-- Caluclates the proportional population for each group of buffers
-- and also the area of the groupped buffers
e as (
    select
        d.*,
        sum(
            bgs.population * (
                st_area(st_intersection(d.geom, bgs.geom)) / st_area(bgs.geom)
            )
        ) as pop,
        st_area(st_transform(d.geom, 3857)) as area
    from
        d
        join nys_2021_census_block_groups bgs on st_intersects(bgs.geom, d.geom)
    group by
        d.geom,
        d.neighborhood
)

-- Calculates the population density
select
    neighborhood,
    pop / area as potential
from
    e
order by
    pop / area desc

11 - Suitability analysis

11.1 Market expansion potential

11.1

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis port=25432" nyc_pharmacies.geojson \
-nln nyc_pharmacies -lco GEOMETRY_NAME=geom

11.2

11.8

-- Selects necessary data for Duane Reade locations
with a as (
    select
        id,
        amenity,
        brand,
        name,
        geom
    from
        nyc_pharmacies
    where
        name ilike '%duane%'
),

-- Spatially join the Duane Reade stores to neighborhoods and adds neighborhood names
b as (
    select
        a.*,
        b.neighborhood
    from
        a
        join nyc_neighborhoods b on st_intersects(a.geom, b.geom)
),

-- Creates a buffer for each store
c as (
    select
        id,
        st_buffer(st_transform(geom, 3857), 800) as buffer,
        neighborhood
    from
        b
),

-- Union the buffers together by neighborhood
d as (
    select
        st_transform(st_union(buffer), 4326) as geom,
        neighborhood
    from
        c
    group by
        neighborhood
),

-- Caluclates the proportional population for each group of buffers
-- and also the area of the groupped buffers
e as (
    select
        d.*,
        sum(
            bgs.population * (
                st_area(st_intersection(d.geom, bgs.geom)) / st_area(bgs.geom)
            )
        ) as pop,
        st_area(st_transform(d.geom, 3857)) as area
    from
        d
        join nys_2021_census_block_groups bgs on st_intersects(bgs.geom, d.geom)
    group by
        d.geom,
        d.neighborhood
)

-- Calculates the population density
select
    neighborhood,
    pop / area as potential
from
    e
order by
    pop / area desc

11.2 Similarity search or twin areas

11.9

create table tree_similarity as 

-- Finds the count of all trees in each neighborhood
with a as (
    select
        count(t.*) as total_trees,
        n.neighborhood
    from
        nyc_2015_tree_census t
        join nyc_neighborhoods n on st_intersects(t.geom, n.geom)
    group by
        n.neighborhood
)

-- Finds the count of each type of tree in each neighborhood
select
    n.neighborhood,
    (
        count(t.*) filter (
            where
                t.spc_common ilike '%pine%'
        ) :: numeric / a.total_trees
    ) as pine,
    (
        count(t.*) filter (
            where
                t.spc_common ilike '%maple%'
        ) :: numeric / a.total_trees
    ) as maple,
    (
        count(t.*) filter (
            where
                t.spc_common ilike '%linden%'
        ) :: numeric / a.total_trees
    ) as linden,
    (
        count(t.*) filter (
            where
                t.spc_common ilike '%honeylocust%'
        ) :: numeric / a.total_trees
    ) as honeylocust,
    (
        count(t.*) filter (
            where
                t.spc_common ilike '%oak%'
        ) :: numeric / a.total_trees
    ) as oak

-- Joins the above data with data from the original CTE
from
    nyc_2015_tree_census t
    join nyc_neighborhoods n on st_intersects(t.geom, n.geom)
    join a using (neighborhood)
group by
    n.neighborhood,
    a.total_trees

11.10

with a as (
    select
        *
    from
        tree_similarity
    where
        neighborhood = 'Harlem'
)
select
    t.neighborhood,

    -- Here we subtract the values from our target neighborhood in table "t"
    -- which is Harlem, from the average values across the city
    t.pine - a.pine as pine_dif,
    t.maple - a.maple as maple_dif,
    t.oak - a.oak as oak_dif,
    t.linden - a.linden as linden_dif,
    t.honeylocust - a.honeylocust as honeylocust_dif
from
    tree_similarity t,
    a
where
    t.neighborhood != 'Harlem'

11.11

create table harlem_tree_similarity as with a as (
    select
        *
    from
        tree_similarity
    where
        neighborhood = 'Harlem'
),

-- A: Find the difference between Harlem and all other neighborhoods 
b as (
    select
        t.neighborhood,
        t.pine - a.pine as pine_dif,
        t.maple - a.maple as maple_dif,
        t.oak - a.oak as oak_dif,
        t.linden - a.linden as linden_dif,
        t.honeylocust - a.honeylocust as honeylocust_dif
    from
        tree_similarity t,
        a
    where
        t.neighborhood != 'Harlem'
),

-- B: Find the min and max values in each column and store it as an array 
c as (
    select
        array [min(pine_dif), max(pine_dif)] as pine,
        array [min(maple_dif), max(maple_dif)] as maple,
        array [min(oak_dif), max(oak_dif)] as oak,
        array [min(linden_dif), max(linden_dif)] as linden,
        array [min(honeylocust_dif), max(honeylocust_dif)] as honeylocust
    from
        b
),

-- C: Find the absolute value of each difference value, normalize the data, and subtract that value from 1 
d as (
    select
        b.neighborhood,
        1 - (abs(b.pine_dif) - c.pine [1]) / (c.pine [2] - c.pine [1]) as pine_norm,
        1 - (b.maple_dif - c.maple [1]) / (c.maple [2] - c.maple [1]) as maple_norm,
        1 - (b.oak_dif - c.oak [1]) / (c.oak [2] - c.oak [1]) as oak_norm,
        1 - (b.linden_dif - c.linden [1]) / (c.linden [2] - c.linden [1]) as linden_norm,
        1 - (b.honeylocust_dif - c.honeylocust [1]) / (c.honeylocust [2] - c.honeylocust [1]) as honeylocust_norm
    from
        b,
        c
) 

-- D: Add up and divide the values 
select
    neighborhood,
    (
        pine_norm + maple_norm + oak_norm + linden_norm + honeylocust_norm
    ) / 5 as final
from
    d
order by
    2 desc

11.12

create table harlem_tree_similarity_geo as
select
    s.*,
    h.geom
from
    harlem_tree_similarity s
    join nyc_hoods h using (neighborhood)

11.3 Suitability or composite score

11.13

alter table
    nyc_2015_tree_census
add
    column h3 text

11.14

update
    nyc_2015_tree_census
set
    h3 = h3_lat_lng_to_cell(geom, 10)

11.15

create table nyc_bgs_h3s as
select
    geoid,
    h3_polygon_to_cells(geom, 10)
from
    nys_2021_census_block_groups
where
    left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')

11.16

select
    geoid,
    count(*)
from
    nyc_bgs_h3s
group by
    geoid
order by
    count(*) desc
limit
    5

11.26

-- Get the count of cells in each block group
with a as (
    select
        geoid,
        count(*) as count
    from
        nyc_bgs_h3s
    group by
        geoid
    order by
        count(*) desc
),

-- Join the total population to each H3 cell
b as (
    select
        h3.geoid,
        h3.h3_polygon_to_cells as h3,
        c.population as pop
    from
        nyc_bgs_h3s h3
        join nys_2021_census_block_groups c on h3.geoid = c.geoid
),

-- Find the proportional population by dividing the total
-- population by the H3 cell count per block group
c as (
    select
        b.pop :: numeric / a.count :: numeric as pop,
        b.h3,
        b.geoid
    from
        b
        join a using (geoid)
),

-- Find the scaled values for each target data point
d as (
    select
        c.*,
        abs(70000 - bgs.income) as income,
        abs(35 - bgs.age) as age
    from
        c
        join nys_2021_census_block_groups bgs using (geoid)
),

-- Get the tree count in each cell
e as (
    select
        d.h3,
        count(t.ogc_fid) as trees
    from
        d
        join nyc_2015_tree_census t on d.h3 :: text = t.h3
    group by
        d.h3
),

-- Add the min and max values for each data point to an array
f as (
    select
        array [min(trees), max(trees)] as trees_s,
        array [min(pop), max(pop)] as pop_s,
        array [min(income), max(income)] as income_s,
        array [min(age), max(age)] as age_s
    from
        e
        join d on d.h3 = e.h3
),

-- Join the two previous CTEs together
g as (
    select
        e.trees,
        d.age,
        d.income,
        d.pop,
        d.h3
    from
        d
        join e on d.h3 = e.h3
),

-- Calculate the 0 to 1 index
h as (
    select
        g.h3,
        (
            (g.trees :: numeric - f.trees_s [1]) /(f.trees_s [2] - f.trees_s [1])
        ) as trees_i,
        1 - ((g.age - f.age_s [1]) /(f.age_s [2] - f.age_s [1])) as age_i,
        1 - (
            (g.income - f.income_s [1]) /(f.income_s [2] - f.income_s [1])
        ) as income_i,
        ((g.pop - f.pop_s [1]) /(f.pop_s [2] - f.pop_s [1])) as pop_i
    from
        g,
        f
)

-- Add up to find the final index value
select
    *,
    trees_i + age_i + income_i + pop_i
from
    h

11.4 Mergers and acquisitions

11.27

create table pharmacy_stats as
select
    p.id,
    p.amenity,
    p.brand,
    p.name,
    p.geom,

    -- Add a buffer of 800 meters for later use
    st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326) as buffer,

    -- Get the total proportional population for each buffer 
    sum(
        bgs.population * (
            st_area(
                st_intersection(
                    st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326),
                    bgs.geom
                )
            ) / st_area(bgs.geom)
        )
    ) as pop
from
    nyc_pharmacies p
    join nyc_2021_census_block_groups bgs on st_intersects(p.geom, st_transform(bgs.geom, 4326))

-- Get just the stores for CVS and Duane Reade
where
    p.name ilike '%duane%'
    or p.name ilike '%cvs%'
group by
    p.id,
    p.amenity,
    p.brand,
    p.name,
    p.geom

11.28

select
    *
from
    nyc_pharmacies
where
    name ilike '%duane%'
    or name ilike '%cvs%'

11.29

-- CTE with all Duane Reade locations 
with dr as (
    select
        id,
        amenity,
        brand,
        name,
        geom,
        st_transform(geom, 3857) as geom_3857
    from
        nyc_pharmacies
    where
        name ilike '%duane%'
),

-- CTE with all CVS locations 
cvs as (
    select
        id,
        amenity,
        brand,
        name,
        geom,
        st_transform(geom, 3857) as geom_3857
    from
        nyc_pharmacies
    where
        name ilike '%cvs%'
),

-- Find all Duane Reade locations within 200 meters of a CVS
remove_nearest as (
    select
        dr.*
    from
        dr,
        cvs
    where
        st_dwithin(dr.geom_3857, cvs.geom_3857, 200)
)
select
    count(*)
from
    remove_nearest

11.30

with dr as (
    select
        *
    from
        pharmacy_stats
    where
        name ilike '%duane%'
)
select
    dr.*,

    -- Find the area of overlap between the two buffer groups
    st_area(
        st_intersection(pharmacy_stats.buffer, dr.buffer)
    ) / st_area(dr.buffer)
from
    dr
    join pharmacy_stats on st_intersects(dr.buffer, pharmacy_stats.buffer)
where

    -- Find the number of pharmacies that have an overlap greater than 75%
    st_area(
        st_intersection(pharmacy_stats.buffer, dr.buffer)
    ) / st_area(dr.buffer) >.75
    and pharmacy_stats.name ilike '%cvs%'

11.31

with a as (
    select

        -- Union all buffers to find the "before" scenario total area
        st_union(
            st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326)
        ) as buffer,
        st_area(
            st_union(st_buffer(st_transform(p.geom, 3857), 800))
        ) as area
    from
        nyc_pharmacies p
        join nyc_2021_census_block_groups bgs on st_intersects(
            st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326),
            st_transform(bgs.geom, 4326)
        )
    where
        p.name ilike '%duane%'
        or p.name ilike '%cvs%'
)
select
    a.*,

    -- Find the total populatino of all combined buffers
    sum(
        bgs.population * (
            st_area(st_intersection(a.buffer, bgs.geom)) / st_area(bgs.geom)
        )
    ) as pop
from
    a
    join nyc_2021_census_block_groups bgs on st_intersects(a.buffer, st_transform(bgs.geom, 4326))
group by
    1,
    2

11.32

create table duane_reade_ids as with overlap as (
    with dr as (
        select
            *
        from
            pharmacy_stats
        where
            name ilike '%duane%'
    )

    -- Find all the Duane Reade locations that have an overlap of over 75%
    select
        dr.id
    from
        dr
        join pharmacy_stats on st_intersects(dr.buffer, pharmacy_stats.buffer)
    where
        st_area(
            st_intersection(pharmacy_stats.buffer, dr.buffer)
        ) / st_area(dr.buffer) >.75
        and pharmacy_stats.name ilike '%cvs%'
),

-- Select all Duane Reade stores
dr as (
    select
        id,
        amenity,
        brand,
        name,
        geom,
        st_transform(geom, 3857) as geom_3857
    from
        nyc_pharmacies
    where
        name ilike '%duane%'
),

-- Select all CVS stores
cvs as (
    select
        id,
        amenity,
        brand,
        name,
        geom,
        st_transform(geom, 3857) as geom_3857
    from
        nyc_pharmacies
    where
        name ilike '%cvs%'
),

-- Remove all the Duane Reade locations within 200 meters of a CVS
remove_nearest as (
    select
        dr.id
    from
        dr,
        cvs
    where
        st_dwithin(dr.geom_3857, cvs.geom_3857, 200)
)
select
    id
from
    remove_nearest
union
select
    id
from
    overlap

11.33

-- We run the same query but we remove IDs not in the table we just created
with p as (
    select
        *
    from
        nyc_pharmacies
    where
        id not in (
            select
                id
            from
                duane_reade_ids
        )
),
a as (
    select
        st_union(
            st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326)
        ) as buffer,
        st_area(
            st_union(st_buffer(st_transform(p.geom, 3857), 800))
        ) as area
    from
        nyc_pharmacies p
        join nyc_2021_census_block_groups bgs on st_intersects(
            st_transform(st_buffer(st_transform(p.geom, 3857), 800), 4326),
            st_transform(bgs.geom, 4326)
        )
    where
        p.name ilike '%duane%'
        or p.name ilike '%cvs%'
)
select
    a.*,
    sum(
        bgs.population * (
            st_area(st_intersection(a.buffer, bgs.geom)) / st_area(bgs.geom)
        )
    ) as pop
from
    a
    join nyc_2021_census_block_groups bgs on st_intersects(a.buffer, st_transform(bgs.geom, 4326))
group by
    1,
    2

12 - Working with raster data in PostGIS

12.1 Raster Ingest

12.1

docker run --name mini-postgis -p 35432:5432 --network="host" 
-v /Users/mattforrest/Documents/spatial-sql-book/raster:/mnt/mydata 
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master

12.2

raster2pgsql -s 4269 -I -C -M mnt/mydata/denver-elevation.tif -t 128x128 -F denver_elevation | psql -d gis -h 127.0.0.1 -p 25432 -U docker -W

12.3

select
    *
from
    denver_elevation_full

12.4

with c as (
    select
        (st_contour(rast, 1, 200.00)).*
    from
        denver_elevation
    where
        filename = 'denver-elevation.tif'
)
select
    st_transform(geom, 4326),
    id,
    value
from
    c

12 - Working with raster data in PostGIS

12.1 Raster Ingest

12.1

docker run --name mini-postgis -p 35432:5432 --network="host" 
-v /Users/mattforrest/Documents/spatial-sql-book/raster:/mnt/mydata 
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master

12.2

raster2pgsql -s 4269 -I -C -M mnt/mydata/denver-elevation.tif -t 128x128 -F denver_elevation | psql -d gis -h 127.0.0.1 -p 25432 -U docker -W

12.3

select
    *
from
    denver_elevation_full

12.4

with c as (
    select
        (st_contour(rast, 1, 200.00)).*
    from
        denver_elevation
    where
        filename = 'denver-elevation.tif'
)
select
    st_transform(geom, 4326),
    id,
    value
from
    c

12.2 Interpolation

12.5

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=gis port=25432" nys_mesonet_202307.csv \ 
-dialect sqlite -sql 'select makepoint(cast("longitude [degrees_east]" as numeric), cast("latitude [degrees_north]" as numeric), 4326)  AS geom, * from nys_mesonet_202307' \ 
-nln nys_mesonet_202307 -lco GEOMETRY_NAME=geom

12.9

create table nys_mesonet_raster as with inputs as (
    select
        -- Sets the pixel size to 0.01 decimal degrees since
        -- we are using EPSG 4326
        0.01 :: float8 as pixelsize,

        -- Sets the smoothing algorithim from "gdal_grid"
        'invdist:power:5.5:smoothing:2.0' as algorithm,
        st_collect(

            -- Creates a geometry collection of our data and forces
            -- a Z coodrinate of the max temparature 
            st_force3dz(
                geom,
                case
                    when "apparent_temperature_max [degc]" = '' then null
                    else "apparent_temperature_max [degc]" :: numeric
                end
            )
        ) as geom,

        -- Expands the grid to add room aroung the edges
        st_expand(st_collect(geom), 0.5) as ext
    from
        nys_mesonet_202307
    where
        time_end = '2023-07-23 12:00:00 EDT'
),
sizes AS (
  SELECT
    ceil((ST_XMax(ext) - ST_XMin(ext)) / pixelsize) :: integer AS width,
    ceil((ST_YMax(ext) - ST_YMin(ext)) / pixelsize) :: integer AS height,
    ST_XMin(ext) AS upperleftx,
    ST_YMax(ext) AS upperlefty
  FROM
    inputs
)
SELECT

    -- Sets 1 as the raster ID since we only have one raster
    1 AS rid,
    ST_InterpolateRaster(

        -- The geometry collection that will be used to interpolate to the raster
        geom,

        -- The algorithim we defined
        algorithm,
        ST_SetSRID(

            -- This creates the band
            ST_AddBand(

                -- Creates the empty raster with our arguments from the previous CTEs
                ST_MakeEmptyRaster(width, height, upperleftx, upperlefty, pixelsize),

                -- Sets the default values as a 32 bit float
                '32BF'
            ),

            -- The SRID the raster will use
            ST_SRID(geom)
        )
    ) AS rast 
FROM
    sizes,
    inputs

12.3 Raster to H3

12.10

create table denver_ele_h3 as
select
    h3_lat_lng_to_cell(a.geom, 11) as h3,
    avg(val) as avg_ele
from
    (
        select
            r.*
        from
            denver_elevation,
            lateral ST_PixelAsCentroids(rast, 1) as r
    ) a
group by
    1

12.11

select
    r.*
from
    denver_elevation,
    lateral ST_PixelAsCentroids(rast, 1) as r

13 - Routing and networks with pgRouting

13.1 Prepare data to use in pgRouting

13.1

docker run --name mini-postgis -p 35432:5432 --network="host" 
-v /Users/mattforrest/Desktop/Desktop/spatial-sql-book/raster:/mnt/mydata 
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master

13.2

docker container exec -it mini-postgis bash

13.3

apt update
apt install osm2pgrouting

13.4

apt install osmctools

13.5

osmfilter /mnt/mydata/planet_-74.459,40.488_-73.385,41.055.osm --keep="highway=" \
-o=/mnt/mydata/good_ways.osm

13 - Routing and networks with pgRouting

13.1 Prepare data to use in pgRouting

13.1

docker run --name mini-postgis -p 35432:5432 --network="host" 
-v /Users/mattforrest/Desktop/Desktop/spatial-sql-book/raster:/mnt/mydata 
-e POSTGRES_USER=admin -e POSTGRES_PASSWORD=password -d postgis/postgis:15-master

13.2

docker container exec -it mini-postgis bash

13.3

apt update
apt install osm2pgrouting

13.4

apt install osmctools

13.5

osmfilter /mnt/mydata/planet_-74.459,40.488_-73.385,41.055.osm --keep="highway=" \
-o=/mnt/mydata/good_ways.osm

13.2 Create a simple route in pgRouting

13.6

osm2pgrouting \
    -f "/mnt/mydata/good_ways.osm" \
    -d routing \
    -p 25432 \
    -U docker \
    -W docker \
    --clean

13.11

-- Find the source ID closest to the starting point
with start as (
    select
        source
    from
        ways
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('point (-74.244391 40.498995)'),
            4326
        )
    limit
        1
), 

-- Find the source ID closest to the end point
destination as (
    select
        source
    from
        ways
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('point (-73.902630 40.912329)'),
            4326
        )
    limit
        1
)

-- Run our pgRouting query
select
    st_union(the_geom) as route
from
    pgr_dijkstra(
        'select gid as id, source, target, cost,
        reverse_cost, st_length(st_transform(the_geom, 3857)) 
        as cost from ways',
        (
            select
                source
            from
                start
        ),
        (
            select
                source
            from
                destination
        ),
        true
    ) as di
    join ways as pt on di.edge = pt.gid;

13.12

select
    tag_id,
    tag_key,
    tag_value
from
    configuration
order by
    tag_id;

13.13

create table car_config as
select
    *
from
    configuration

13.14

alter table
    car_config
add
    column penalty float;

13.15

update car_config set penalty=1

13.16

update
    car_config
set
    penalty = -1.0
where
    tag_value in ('steps', 'footway', 'pedestrian');

update
    car_config
set
    penalty = 5
where
    tag_value in ('unclassified');

13.17

update
    car_config
set
    penalty = 0.5
where
    tag_value in ('tertiary');

update
    car_config
set
    penalty = 0.3
where
    tag_value in (
        'primary',
        'primary_link',
        'trunk',
        'trunk_link',
        'motorway',
        'motorway_junction',
        'motorway_link',
        'secondary'
    );

13.19

with start as (
    select
        source
    from
        ways
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('point (-74.244391 40.498995)'),
            4326
        )
    limit
        1
), destination as (
    select
        source
    from
        ways
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('point (-73.902630 40.912329)'),
            4326
        )
    limit
        1
)
select
    st_union(the_geom) as route
from
    pgr_dijkstra(
        'select
            gid as id,
            source,
            target,
            cost_s * penalty as cost,
            reverse_cost_s * penalty as reverse_cost,
            st_length(st_transform(the_geom, 3857)) as length
        from
            ways
            join car_config using (tag_id)',
        (
            select
                source
            from
                start
        ),
        (
            select
                source
            from
                destination
        ),
        true
    ) as di
    join ways as pt on di.edge = pt.gid;

13.3 Building an origin-destination matrix

13.20

osmfilter mnt/mydata/planet_-74.459,40.488_-73.385,41.055.osm \
--keep="highway= route= cycleway= bicycle= segregated=" \
-o=mnt/mydata/bike_ways.osm

13.21

osm2pgrouting \
-f "mnt/mydata/bike_ways.osm" \
-d bike \
-p 25432 \
-U docker \
-W docker \
--clean

13.22

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=bike port=25432" \
nyc_pharmacies.geojson \
-nln nyc_pharmacies -lco GEOMETRY_NAME=geom 

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=bike port=25432" \
Building_Footprints.geojson \
-nln nyc_building_footprints -lco GEOMETRY_NAME=geom

13.23

alter table
    configuration
add
    column penalty float;

update
    configuration
set
    penalty = 1.0;

13.24

update
    configuration
set
    penalty = 10.0
where
    tag_value in ('steps', 'footway', 'pedestrian');

update
    configuration
set
    penalty = 0.3
where
    tag_key in ('cycleway');

update
    configuration
set
    penalty = 0.3
where
    tag_value in ('cycleway', 'bicycle');

update
    configuration
set
    penalty = 0.7
where
    tag_value in ('tertiary', 'residential');

update
    configuration
set
    penalty = 1
where
    tag_value in ('secondary');

update
    configuration
set
    penalty = 1.2
where
    tag_value in ('primary', 'primary_link');

update
    configuration
set
    penalty = 2
where
    tag_value in (
        'trunk',
        'trunk_link',
        'motorway',
        'motorway_junction',
        'motorway_link'
    );

13.26

with pharm as (
    select
        name,
        st_centroid(geom) as geom,
        id
    from
        nyc_pharmacies
    where
        st_dwithin(
            st_centroid(geom) :: geography,
            st_setsrid(st_makepoint(-73.9700743, 40.6738928), 4326) :: geography,
            3000
        )
    order by
        random()
    limit
        3
), bldgs as (
    select
        st_centroid(geom) as bldg_geom,
        bin
    from
        nyc_building_footprints
    where
        st_dwithin(
            st_centroid(geom) :: geography,
            st_setsrid(st_makepoint(-73.9700743, 40.6738928), 4326) :: geography,
            3000
        )
    order by
        random()
    limit
        10
), 

c as (

    -- First we select all the columns from the pharm, bldgs, and wid CTEs and subqueries
    select
        pharm.*,
        bldgs.*,
        wid.*
    from

        -- We perform a cross join to find all possible matches between
        -- the 5 pharmacies and the 20 buildings
        pharm,
        bldgs
        cross join lateral (

            -- For each row find the start and end way IDs
            with start as (
                select
                    source
                from
                    ways
                order by
                    the_geom <-> pharm.geom
                limit
                    1
            ), destination as (
                select
                    source
                from
                    ways
                order by
                    ways.the_geom <-> st_centroid(bldgs.bldg_geom)
                limit
                    1
            )
            select
                start.source as start_id,
                destination.source as end_id
            from
                start,
                destination
        ) wid
)
select
    -- For each combination we get the sum of the cost in distance, seconds, and route length
    -- and we repeat this for every row, or possible combinatino
    sum(di.cost) as cost,
    sum(length) as length,
    sum(pt.cost_s) as seconds,
    st_union(st_transform(the_geom, 4326)) as route
from
    pgr_dijkstra(
        'select 
        gid as id, 
        source, 
        target, 
        cost_s * penalty as cost, 
        reverse_cost_s * penalty as reverse_cost, 
        st_length(st_transform(the_geom, 3857)) as length 
        from ways 
        join configuration using (tag_id)',
        array(
            select
                distinct start_id
            from
                c
        ),
        array(
            select
                distinct end_id
            from
                c
        ),
        true
    ) as di
    join ways as pt on di.edge = pt.gid
group by
    start_vid,
    end_vid

13.4 Traveling salesman problem

13.29

-- Find 10 random Rite-Aid locations
with a as (
    select
        *
    from
        nyc_pharmacies
    where
        name ilike '%rite%'
    limit
        10
)
select

    -- For each pharmacy we will find 10 random 
    -- buildings within 800 meters
    a.name,
    a.id as pharm_id,
    a.geom as pharm_geom,
    b.*
from
    a
    cross join lateral (
        select
            bin as building_id,
            geom
        from
            nyc_building_footprints
        where
            st_dwithin(
                st_centroid(geom) :: geography,
                st_centroid(a.geom) :: geography,
                800
            )
        order by
            random()
        limit
            10
    ) b

13.30

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=bike port=25432" \
rite_aid_odm.csv \
-nln rite_aid_odm -lco GEOMETRY_NAME=geom -oo AUTODETECT_TYPE=true

13.32

create table rite_aid_tsp as with a as (
    select
        distinct b.pharm_id as id,
        b.geom,
        s.source
    from
        rite_aid_odm b
        cross join lateral(
            SELECT
                source
            FROM
                ways
            ORDER BY
                the_geom <-> b.pharm_geom
            limit
                1
        ) s
), b as (
    select
        b.pharm_id as id,
        s.source
    from
        rite_aid_odm b
        cross join lateral(
            SELECT
                source
            FROM
                ways
            ORDER BY
                the_geom <-> b.geom
            limit
                1
        ) s
), c as (
    select
        a.id,
        a.source,

        -- Constructs an array with the way ID of the pharmacy as the first item
        array_prepend(a.source, array_agg(distinct b.source)) as destinations
    from
        a
        join b using (id)

    -- Will return one row per pharmacy ID
    group by
        a.id,
        a.source
)
select
    *
from
    c

13.33

create table rite_aid_tsp_odm as

-- Select all the values from the table created in the last step
select
    a.*,
    r.*
from
    rite_aid_tsp a
    cross join lateral (
        select
            *
        from

            -- This will create the cost matrix for each pharmacy
            pgr_dijkstracostmatrix(
                'select 
                gid as id, 
                source, 
                target, 
                cost_s * penalty as cost, 
                reverse_cost_s * penalty as reverse_cost 
                from ways 
                join configuration 
                using (tag_id)',

                -- We can use the array to calculate the distances 
                -- between all locations in the array
                (
                    select
                        destinations
                    from
                        rite_aid_tsp
                    where
                        id = a.id
                ),
                directed := false
            )
    ) r

13.37

create table solved_tsp as
select
    s.id,
    s.source,
    tsp.*,
    lead(tsp.node, 1) over (
        partition by s.source
        order by
            tsp.seq
    ) as next_node
from
    rite_aid_tsp s
    cross join lateral (
        select
            *
        from
            pgr_TSP(
                $$
                select
                    *
                from
                    rite_aid_tsp_odm
                where
                    source = $$ || s.source || $$$$
            )
    ) tsp

13.38

create table final_tsp_test as 
with a as (
    select
        s.id,
        s.source,
        s.seq,
        s.node,
        s.next_node,
        di.*,
        ways.the_geom,
        st_length(st_transform(ways.the_geom, 3857)) as length
    from
        solved_tsp s,
        
        -- We cross join this to each row 
        pgr_dijkstra(
            'select 
            gid as id, 
            source, 
            target, 
            cost_s, 
			cost_s * penalty as cost, 
			reverse_cost_s * penalty as reverse_cost 
            from ways 
            join configuration
            using (tag_id)',

            -- We create a route between the current node and the next node
            s.node,
            s.next_node,
            true
        ) as di
        join ways on di.node = ways.source
)
select

    -- Union the geometries and find the sum of the cost and length for each route
    st_union(st_transform(the_geom, 4326)) as route,
    source,
    sum(cost) as cost,
    sum(length) as length
from
    a
group by
    source

13.5 Creating travel time polygons or isochones

13.41

with start as (
    select
        source
    from
        ways
        join car_config using (tag_id)
    where
        car_config.penalty != -1
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('POINT (-73.987374 40.722349)'),
            4326
        )
    limit
        1
), b as (
    select
        *
    from
        pgr_drivingdistance(
            'select 
            gid as id, 
            source, 
            target,
            cost_s * 2.5 as cost, 
            reverse_cost_s * 2.5 as reverse_cost 
            from ways 
            join car_config 
            using (tag_id) 
            where tag_id in (110, 100, 124, 115, 112, 125, 109, 101, 
            103, 102, 106, 107, 108, 104, 105)',
            (
                select
                    source
                from
                    start
            ),
            600
        )
)

-- Union the geometries into a single geometry
select
	st_union(ways.the_geom)
from
    ways
where
	gid in (select edge from b)

13.42

with start as (
    select
        source
    from
        ways
        join car_config using (tag_id)
    where
        car_config.penalty != -1
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('POINT (-73.987374 40.722349)'),
            4326
        )
    limit
        1
), b as (
    select
        *
    from
        pgr_drivingdistance(
            'select 
            gid as id, 
            source, 
            target, 
            cost_s * 2.5 as cost, 
            reverse_cost_s * 2.5 as reverse_cost 
            from ways 
            join car_config 
            using (tag_id) 
            where tag_id in (110, 100, 124, 115, 112, 125, 
            109, 101, 103, 102, 106, 107, 108, 104, 105)',
            (
                select
                    source
                from
                    start
            ),
            600
        )
)
select
    -- Creates a concave hull around the entire routes geometry
    st_concavehull(
        st_transform(
            st_union(the_geom), 4326),
        0.1) as geom
from
    ways
where
    gid in (
        select
            distinct edge
        from
            b
    )

13.43

with start as (
    select
        source
    from
        ways
        join car_config using (tag_id)
    where
        car_config.penalty != -1
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('POINT (-73.987374 40.722349)'),
            4326
        )
    limit
        1
), b as (
    select
        *
    from
        pgr_drivingdistance(
            'select gid as id, 
            source, 
            target, 
            cost_s * 2.5 as cost, 
            reverse_cost_s * 2.5 as reverse_cost 
            from ways 
            join car_config 
            using (tag_id) 
            where tag_id in (110, 100, 124, 115, 112, 
            125, 109, 101, 103, 102, 106, 107, 108, 104, 105)',
            (
                select
                    source
                from
                    start
            ),
            600
        )
)
select

    -- Turn it all into a polygon
    st_makepolygon(

        -- Find the exterior ring which will eliminate islands
        st_exteriorring(

            -- Create a 20 meter buffer
            st_buffer(
                st_transform(
                    st_union(the_geom), 4326) :: geography,
                20
            ) :: geometry
        )
    ) as geom
from
    ways
where
    gid in (
        select
            distinct edge
        from
            b
    )

13.44

with start as (
    select
        source
    from
        ways
        join configuration using (tag_id)
    where
        configuration.penalty <= 1
    order by
        the_geom <-> st_setsrid(
            st_geomfromtext('POINT (-73.987374 40.722349)'),
            4326
        )
    limit
        1
), b as (
    select
        *
    from
        pgr_drivingdistance(
            'select 
            gid as id, 
            source, 
            target, 
            cost_s * 2.5 as cost, 
            reverse_cost_s * 2.5 as reverse_cost 
            from ways 
            join configuration 
            using (tag_id) 
            where penalty <= 1',
            (
                select
                    source
                from
                    start
            ),
            600
        )
)
select
    st_makepolygon(
        st_exteriorring(
            st_buffer(
                st_transform(st_union(the_geom), 4326) :: geography,
                20
            ) :: geometry
        )
    ) as geom
from
    ways
where
    gid in (
        select
            distinct edge
        from
            b
    )

14 - Spatial autocorrelation and optimization with Python and PySAL

14.1 Spatial autocorrelation

Shell commands

CREATE EXTENSION plpython3u;
docker container exec -it docker-postgis-db-1 bash
apt update
apt install python3-pip
python3 --version
pip3 --version
apt-get install gdal-bin
apt-get install libgdal-dev
pip3 install esda matplotlib libpysal spopt geopandas scikit-learn --break-system-packages

14.1

CREATE FUNCTION pymax (a integer, b integer) 
RETURNS integer 
AS $$ 
    if a > b: 
        return a 
    return b 
$$ 
LANGUAGE plpython3u;

14.2

select
    pymax(100, 1)

14.3

create function python_limit_5 (tablename TEXT) 
returns text
AS $$
	data = plpy.execute(f'''select * from {tablename} limit 5''')
	return data 
$$
LANGUAGE plpython3u;
create table nyc_neighborhoods_no_geom as
select
    ogc_fid,
    neighborhood,
    boroughcode,
    borough
from
    nyc_neighborhoods

14.5

create or replace function python_limit_5 (tablename TEXT) 
returns text 
AS $$ 
    data = plpy.execute(f'''select * from {tablename} limit 5''') 
    return data[0] 
$$ 
LANGUAGE plpython3u;

14.7

create or replace function python_limit_5 (tablename TEXT) 
returns text 
AS $$ 
    data = plpy.execute(f'''select * from {tablename} limit 5''') 
    return data[0]['neighborhood'] 
$$ 
LANGUAGE plpython3u;

14.8

create
or replace function pysal_esda_test(
    tablename TEXT,
    geom TEXT,
    col TEXT,
    id_col TEXT,
    similarity FLOAT
) 
returns text 
AS $$ 
    import esda 
    import libpysal as lps 
    from libpysal.weights import W 
    import json 
    import numpy as np 
    import pandas as pd 
    
    neighbors = plpy.execute(
        f '''
        select
            json_object_agg(b.{ id_col }, a.neighbors) as neighbors
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col } :: text) as neighbors
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null  
        '''
    ) 
    
    weights = plpy.execute(
        f ''' 
        select
            json_object_agg(b.{ id_col }, a.weights) as weights
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col }) as neighbors,
                    array_fill(
                        (
                            case
                                when count(z.{ id_col }) = 0 then 0
                                else 1 / count(z.{ id_col }) :: numeric
                            end
                        ),
                        array [count(z.{id_col})::int]
                    ) as weights
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null
        '''
    ) 
    return neighbors 
$$ 
LANGUAGE 'plpython3u';

14.17

create
or replace function pysal_esda_test(
    tablename TEXT,
    geom TEXT,
    col TEXT,
    id_col TEXT,
    similarity FLOAT
) 
returns text 
AS $$ 
    import esda 
    import libpysal as lps 
    from libpysal.weights import W 
    import json 
    import numpy as np 
    import pandas as pd 
    
    neighbors = plpy.execute(
        f '''
        select
            json_object_agg(b.{ id_col }, a.neighbors) as neighbors
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col } :: text) as neighbors
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null  
        '''
    ) 
    
    weights = plpy.execute(
        f ''' 
        select
            json_object_agg(b.{ id_col }, a.weights) as weights
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col }) as neighbors,
                    array_fill(
                        (
                            case
                                when count(z.{ id_col }) = 0 then 0
                                else 1 / count(z.{ id_col }) :: numeric
                            end
                        ),
                        array [count(z.{id_col})::int]
                    ) as weights
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null
        '''
    )  
    
    w = W(json.loads(neighbors [0] ['neighbors']), json.loads(weights [0] ['weights']), silence_warnings = True) 
    w.transform = 'r' 
    
    var_data = plpy.execute(
        f '''
        with a as (
            select
                distinct b.{ col },
                b.{ id_col }
            from
                { tablename } b
                cross join lateral (
                    select
                        array_agg(z.{ id_col }) as neighbors
                    from
                        { tablename } z
                    where
                        st_intersects(z.{ geom }, b.{ geom })
                        and z.{ id_col } != b.{ id_col }
                ) a
            where
                a.neighbors is not null
        )
        select
            { col } as data_col
        from
            a
        ''') 
    
    var_list = [] 
    for i in var_data: 
        if i ['data_col'] == None: 
            var_list.append(float(0.0))
        else: 
            var_list.append(float(i ['data_col'])) 
    
    li = esda.moran.Moran_Local(np.array(var_list), w) 
    
    return var_list 
$$ 
LANGUAGE 'plpython3u';

14.22

create or replace function pysal_esda (
    tablename TEXT, 
    geom TEXT, 
    col TEXT, 
    id_col TEXT, 
    similarity FLOAT
)
returns table (
    id TEXT, 
    col FLOAT, 
    local_morani_values FLOAT, 
    p_values FLOAT, 
    quadrant TEXT, 
    geom GEOMETRY
)
AS $$ 
    import esda 
    import libpysal as lps 
    from libpysal.weights import W 
    import json 
    import numpy as np 
    import pandas as pd 
    
    neighbors = plpy.execute(
        f'''
        select
            json_object_agg(b.{id_col}, a.neighbors) as neighbors
        from
            {tablename} b
            cross join lateral (
                select
                    array_agg(z.{id_col} :: text) as neighbors
                from
                    {tablename} z
                where
                    st_intersects(z.{geom}, b.{geom})
                    and z.{id_col} != b.{id_col}
            ) a
        where
            a.neighbors is not null  
        '''
    ) 
    
    weights = plpy.execute(
        f''' 
        select
            json_object_agg(b.{id_col}, a.weights) as weights
        from
            {tablename} b
            cross join lateral (
                select
                    array_agg(z.{id_col}) as neighbors,
                    array_fill(
                        (
                            case
                                when count(z.{id_col}) = 0 then 0
                                else 1 / count(z.{id_col}) :: numeric
                            end
                        ),
                        array [count(z.{id_col})::int]
                    ) as weights
                from
                    {tablename} z
                where
                    st_intersects(z.{geom}, b.{geom})
                    and z.{id_col} != b.{id_col}
            ) a
        where
            a.neighbors is not null
        '''
    )  
    
    w = W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True) 
    w.transform = 'r' 
    
    var_data = plpy.execute(
        f'''
        with a as (
            select
                distinct b.{col},
                b.{id_col}
            from
                {tablename} b
                cross join lateral (
                    select
                        array_agg(z.{id_col}) as neighbors
                    from
                        {tablename} z
                    where
                        st_intersects(z.{geom}, b.{geom})
                        and z.{id_col} != b.{id_col}
                ) a
            where
                a.neighbors is not null
        )
        select
            {col} as data_col
        from
            a
        ''') 
    
    var_list = [] 
    for i in var_data: 
        if i['data_col'] == None: 
            var_list.append(float(0.0))
        else: 
            var_list.append(float(i['data_col'])) 
    
    li = esda.moran.Moran_Local(np.array(var_list), w)  
    
    original = plpy.execute(f'''
    with a as (
        select
            distinct b.{col},
            b.{id_col},
            b.{geom}
        from
            {tablename} b
            cross join lateral (
                select
                    array_agg(z.{id_col}) as neighbors
                from
                    {tablename} z
                where
                    st_intersects(z.{geom}, b.{geom})
                    and z.{id_col} != b.{id_col}
            ) a
        where
            a.neighbors is not null
    )
    select
        {id_col},
        {col},
        {geom}
    from
        a'''
    ) 
    
    original_data = [] 
    lookup_table = [] 
    
    for i in original: 
        original_data.append(i) 
        lookup_table.append(i[f'{id_col}']) 

    df = pd.DataFrame.from_dict(original_data) 
    
    formatted_data = [] 
	
    for i in original_data: 
        dict = i 
        res = lookup_table.index(i[f'{id_col}']) 
        dict['local_morani_values'] = li.Is[res] 
        dict['p_values'] = li.p_sim[res] 
        dict['quadrant'] = li.q[res] 
        formatted_data.append(dict) 
    
    original_data_df = pd.DataFrame.from_dict(formatted_data) 
    final = df.merge(original_data_df, how='inner', on=f'{id_col}') 
    final_df = final.drop([f'{col}_x', f'{geom}_x'], axis=1) 
    final_df = final_df.rename(columns = {f'{col}_y': 'col', f'{geom}_y': f'{geom}', f'{id_col}': 'id'}) 
    
    return final_df.to_dict(orient = 'records')
$$ 
LANGUAGE 'plpython3u';

14.23

create table nyc_2021_census_block_groups_morans_i as
select
    *
from
    nys_2021_census_block_groups
where
    left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
    and population > 0

14.24

create table nyc_bgs_esda as
select
    *
from
    pysal_esda(
        'nyc_2021_census_block_groups_morans_i',
        'geom',
        'income',
        'geoid',
        0.05
    )

14 - Spatial autocorrelation and optimization with Python and PySAL

14.1 Spatial autocorrelation

Shell commands

CREATE EXTENSION plpython3u;
docker container exec -it docker-postgis-db-1 bash
apt update
apt install python3-pip
python3 --version
pip3 --version
apt-get install gdal-bin
apt-get install libgdal-dev
pip3 install esda matplotlib libpysal spopt geopandas scikit-learn --break-system-packages

14.1

CREATE FUNCTION pymax (a integer, b integer) 
RETURNS integer 
AS $$ 
    if a > b: 
        return a 
    return b 
$$ 
LANGUAGE plpython3u;

14.2

select
    pymax(100, 1)

14.3

create function python_limit_5 (tablename TEXT) 
returns text
AS $$
	data = plpy.execute(f'''select * from {tablename} limit 5''')
	return data 
$$
LANGUAGE plpython3u;
create table nyc_neighborhoods_no_geom as
select
    ogc_fid,
    neighborhood,
    boroughcode,
    borough
from
    nyc_neighborhoods

14.5

create or replace function python_limit_5 (tablename TEXT) 
returns text 
AS $$ 
    data = plpy.execute(f'''select * from {tablename} limit 5''') 
    return data[0] 
$$ 
LANGUAGE plpython3u;

14.7

create or replace function python_limit_5 (tablename TEXT) 
returns text 
AS $$ 
    data = plpy.execute(f'''select * from {tablename} limit 5''') 
    return data[0]['neighborhood'] 
$$ 
LANGUAGE plpython3u;

14.8

create
or replace function pysal_esda_test(
    tablename TEXT,
    geom TEXT,
    col TEXT,
    id_col TEXT,
    similarity FLOAT
) 
returns text 
AS $$ 
    import esda 
    import libpysal as lps 
    from libpysal.weights import W 
    import json 
    import numpy as np 
    import pandas as pd 
    
    neighbors = plpy.execute(
        f '''
        select
            json_object_agg(b.{ id_col }, a.neighbors) as neighbors
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col } :: text) as neighbors
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null  
        '''
    ) 
    
    weights = plpy.execute(
        f ''' 
        select
            json_object_agg(b.{ id_col }, a.weights) as weights
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col }) as neighbors,
                    array_fill(
                        (
                            case
                                when count(z.{ id_col }) = 0 then 0
                                else 1 / count(z.{ id_col }) :: numeric
                            end
                        ),
                        array [count(z.{id_col})::int]
                    ) as weights
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null
        '''
    ) 
    return neighbors 
$$ 
LANGUAGE 'plpython3u';

14.17

create
or replace function pysal_esda_test(
    tablename TEXT,
    geom TEXT,
    col TEXT,
    id_col TEXT,
    similarity FLOAT
) 
returns text 
AS $$ 
    import esda 
    import libpysal as lps 
    from libpysal.weights import W 
    import json 
    import numpy as np 
    import pandas as pd 
    
    neighbors = plpy.execute(
        f '''
        select
            json_object_agg(b.{ id_col }, a.neighbors) as neighbors
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col } :: text) as neighbors
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null  
        '''
    ) 
    
    weights = plpy.execute(
        f ''' 
        select
            json_object_agg(b.{ id_col }, a.weights) as weights
        from
            { tablename } b
            cross join lateral (
                select
                    array_agg(z.{ id_col }) as neighbors,
                    array_fill(
                        (
                            case
                                when count(z.{ id_col }) = 0 then 0
                                else 1 / count(z.{ id_col }) :: numeric
                            end
                        ),
                        array [count(z.{id_col})::int]
                    ) as weights
                from
                    { tablename } z
                where
                    st_intersects(z.{ geom }, b.{ geom })
                    and z.{ id_col } != b.{ id_col }
            ) a
        where
            a.neighbors is not null
        '''
    )  
    
    w = W(json.loads(neighbors [0] ['neighbors']), json.loads(weights [0] ['weights']), silence_warnings = True) 
    w.transform = 'r' 
    
    var_data = plpy.execute(
        f '''
        with a as (
            select
                distinct b.{ col },
                b.{ id_col }
            from
                { tablename } b
                cross join lateral (
                    select
                        array_agg(z.{ id_col }) as neighbors
                    from
                        { tablename } z
                    where
                        st_intersects(z.{ geom }, b.{ geom })
                        and z.{ id_col } != b.{ id_col }
                ) a
            where
                a.neighbors is not null
        )
        select
            { col } as data_col
        from
            a
        ''') 
    
    var_list = [] 
    for i in var_data: 
        if i ['data_col'] == None: 
            var_list.append(float(0.0))
        else: 
            var_list.append(float(i ['data_col'])) 
    
    li = esda.moran.Moran_Local(np.array(var_list), w) 
    
    return var_list 
$$ 
LANGUAGE 'plpython3u';

14.22

create or replace function pysal_esda (
    tablename TEXT, 
    geom TEXT, 
    col TEXT, 
    id_col TEXT, 
    similarity FLOAT
)
returns table (
    id TEXT, 
    col FLOAT, 
    local_morani_values FLOAT, 
    p_values FLOAT, 
    quadrant TEXT, 
    geom GEOMETRY
)
AS $$ 
    import esda 
    import libpysal as lps 
    from libpysal.weights import W 
    import json 
    import numpy as np 
    import pandas as pd 
    
    neighbors = plpy.execute(
        f'''
        select
            json_object_agg(b.{id_col}, a.neighbors) as neighbors
        from
            {tablename} b
            cross join lateral (
                select
                    array_agg(z.{id_col} :: text) as neighbors
                from
                    {tablename} z
                where
                    st_intersects(z.{geom}, b.{geom})
                    and z.{id_col} != b.{id_col}
            ) a
        where
            a.neighbors is not null  
        '''
    ) 
    
    weights = plpy.execute(
        f''' 
        select
            json_object_agg(b.{id_col}, a.weights) as weights
        from
            {tablename} b
            cross join lateral (
                select
                    array_agg(z.{id_col}) as neighbors,
                    array_fill(
                        (
                            case
                                when count(z.{id_col}) = 0 then 0
                                else 1 / count(z.{id_col}) :: numeric
                            end
                        ),
                        array [count(z.{id_col})::int]
                    ) as weights
                from
                    {tablename} z
                where
                    st_intersects(z.{geom}, b.{geom})
                    and z.{id_col} != b.{id_col}
            ) a
        where
            a.neighbors is not null
        '''
    )  
    
    w = W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True) 
    w.transform = 'r' 
    
    var_data = plpy.execute(
        f'''
        with a as (
            select
                distinct b.{col},
                b.{id_col}
            from
                {tablename} b
                cross join lateral (
                    select
                        array_agg(z.{id_col}) as neighbors
                    from
                        {tablename} z
                    where
                        st_intersects(z.{geom}, b.{geom})
                        and z.{id_col} != b.{id_col}
                ) a
            where
                a.neighbors is not null
        )
        select
            {col} as data_col
        from
            a
        ''') 
    
    var_list = [] 
    for i in var_data: 
        if i['data_col'] == None: 
            var_list.append(float(0.0))
        else: 
            var_list.append(float(i['data_col'])) 
    
    li = esda.moran.Moran_Local(np.array(var_list), w)  
    
    original = plpy.execute(f'''
    with a as (
        select
            distinct b.{col},
            b.{id_col},
            b.{geom}
        from
            {tablename} b
            cross join lateral (
                select
                    array_agg(z.{id_col}) as neighbors
                from
                    {tablename} z
                where
                    st_intersects(z.{geom}, b.{geom})
                    and z.{id_col} != b.{id_col}
            ) a
        where
            a.neighbors is not null
    )
    select
        {id_col},
        {col},
        {geom}
    from
        a'''
    ) 
    
    original_data = [] 
    lookup_table = [] 
    
    for i in original: 
        original_data.append(i) 
        lookup_table.append(i[f'{id_col}']) 

    df = pd.DataFrame.from_dict(original_data) 
    
    formatted_data = [] 
	
    for i in original_data: 
        dict = i 
        res = lookup_table.index(i[f'{id_col}']) 
        dict['local_morani_values'] = li.Is[res] 
        dict['p_values'] = li.p_sim[res] 
        dict['quadrant'] = li.q[res] 
        formatted_data.append(dict) 
    
    original_data_df = pd.DataFrame.from_dict(formatted_data) 
    final = df.merge(original_data_df, how='inner', on=f'{id_col}') 
    final_df = final.drop([f'{col}_x', f'{geom}_x'], axis=1) 
    final_df = final_df.rename(columns = {f'{col}_y': 'col', f'{geom}_y': f'{geom}', f'{id_col}': 'id'}) 
    
    return final_df.to_dict(orient = 'records')
$$ 
LANGUAGE 'plpython3u';

14.23

create table nyc_2021_census_block_groups_morans_i as
select
    *
from
    nys_2021_census_block_groups
where
    left(geoid, 5) in ('36061', '36005', '36047', '36081', '36085')
    and population > 0

14.24

create table nyc_bgs_esda as
select
    *
from
    pysal_esda(
        'nyc_2021_census_block_groups_morans_i',
        'geom',
        'income',
        'geoid',
        0.05
    )

14.2 Location allocation

14.25

ogr2ogr \ 
-f PostgreSQL PG:"host=localhost user=docker password=docker dbname=routing port=25432" \
FDNY_Firehouse_Listing.csv \ 
-dialect sqlite -sql \
"select *, makepoint(cast(Longitude as float), cast(Latitude as float), 4326) as geom from FDNY_Firehouse_Listing" \ 
-nln nyc_fire_stations -lco GEOMETRY_NAME=geom

14.26

ogr2ogr \ 
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=routing port=25432" nyc_mappluto_22v3_shp/MapPLUTO.shp \ 
-nln building_footprints -lco GEOMETRY_NAME=geom \
-nlt MULTIPOLYGON -mapFieldType Real=String

14.27

ogr2ogr \
-f PostgreSQL PG:"host=localhost user=docker password=docker \
dbname=routing port=25432" \
-dialect SQLite -sql "SELECT neighborhood, boroughcode, borough, \
ST_Area(st_collect(st_transform(geometry, 3857))) AS area, \
st_collect(geometry) as geom FROM nyc_hoods \
group by neighborhood, boroughcode, borough" \
nyc_hoods.geojson \
-nln nyc_neighborhoods -lco GEOMETRY_NAME=geom -nlt PROMOTE_TO_MULTI

14.28

create table new_stations (name text, geom geometry)

14.29

insert into
    new_stations (name, geom)
values
    (
        'Withers and Woodpoint',
        st_setsrid(st_makepoint(-73.941455, 40.717628), 4326)
    ),
    (
        'Grand and Graham',
        st_setsrid(st_makepoint(-73.943791, 40.711584), 4326)
    ),
    (
        'Berry and N 11th',
        st_setsrid(st_makepoint(-73.9566498, 40.7207395), 4326)
    )

14.30

create table all_stations as
select
    facilityname as name,
    geom
from
    nyc_fire_stations
where

    -- Finding all the stations in our three target neighborhoods
    st_intersects(
        geom,
        (
            select
                st_union(st_transform(geom, 4326))
            from
                nyc_neighborhoods
            where
                neighborhood in (
                    'Williamsburg',
                    'East Williamsburg',
                    'Greenpoint'
                )
        )
    )
union
select
    *
from
    new_stations

14.31

create table bk_blgds as
select
    *
from
    building_footprints
where
    st_intersects(
        st_transform(geom, 4326),

        -- Find all the buildings that are in the three neighborhoods
        -- and within 100 meters
        (
            select
                st_buffer(st_union(geom) :: geography, 100) :: geometry
            from
                nyc_neighborhoods
            where
                neighborhood in (
                    'Williamsburg',
                    'East Williamsburg',
                    'Greenpoint'
                )
        )
    )

14.32

create table fire_odm as 
with starts as (

    -- Aggregate the source way ID into an array for
    -- the closest way ID to each station
    select
        array_agg(z.source)
    from
        all_stations
        cross join lateral (
            select
                source
            from
                ways
                join car_config c using (tag_id)
            where

                -- Here we are picking all the way tags that
                -- are not in this list
                c.tag_value not in (
                    'track',
                    'bridleway',
                    'bus_guideway',
                    'byway',
                    'cycleway',
                    'path',
                    'track',
                    'grade1',
                    'grade2',
                    'grade3',
                    'grade4',
                    'grade5',
                    'unclassified',
                    'footway',
                    'pedestrian',
                    'steps'
                )
            order by
                the_geom <-> all_stations.geom
            limit
                1
        ) z
), 

-- Here we do the same with the destinations for the buildings
destinations as (
    select
        array_agg(z.source)
    from
        bk_blgds
        cross join lateral (
            select
                source
            from
                ways
                join car_config c using (tag_id)
            where
                c.tag_value not in (
                    'track',
                    'bridleway',
                    'bus_guideway',
                    'byway',
                    'cycleway',
                    'path',
                    'track',
                    'grade1',
                    'grade2',
                    'grade3',
                    'grade4',
                    'grade5',
                    'unclassified',
                    'footway',
                    'pedestrian',
                    'steps'
                )
            order by
                ways.the_geom <-> st_transform(st_centroid(bk_blgds.geom), 4326)
            limit
                1
        ) z

    -- We only select the buildings in the neighborhood boundaries
    where
        st_intersects(
            st_transform(geom, 4326),
            (
                select
                    st_buffer(st_union(geom) :: geography, 100) :: geometry
                from
                    nyc_neighborhoods
                where
                    neighborhood in (
                        'williamsburg',
                        'east williamsburg',
                        'greenpoint'
                    )
            )
        )
)
select
    *
from

    -- Here we pass these arguments to the pgr_bdDijkstraCost function
    pgr_bddijkstracost(
        $$
        select
            id,
            source,
            target,
            cost_s as cost,
            reverse_cost_s as reverse_cost
        from
            ways
            join car_config using (tag_id)
        where
            st_intersects(
                st_transform(the_geom, 4326),
                (
                    select
                        st_buffer(st_union(geom) :: geography, 100) :: geometry
                    from
                        nyc_neighborhoods
                    where
                        neighborhood in (
                            'williamsburg',
                            'east williamsburg',
                            'greenpoint'
                        )
                )
            ) $$,

            -- We pass the arrays from above as the two arguments
            (
                select
                    *
                from
                    starts
            ),
            (
                select
                    *
                from
                    destinations
            ),
            true
    );

14.46

create
or replace function spopt_pmedian(
    odm_table TEXT,
    optimal_facilities INT,
    clients_table TEXT,
    facilities_table TEXT
)
returns table (
    facility TEXT,
    end_vid INT,
    ogc_fid INT,
    geom GEOMETRY
) AS $$
    import spopt 
    from spopt.locate import PMedian 
    import numpy 
    import pulp 
    import pandas as pd 

    odm = plpy.execute(f''' select * from {odm_table} ''')
    odm_new = [] 

    for i in odm: 
        odm_new.append({'start_vid': i['start_vid'], 'end_vid': i['end_vid'], 'agg_cost': i['agg_cost']})
        
    solver = pulp.PULP_CBC_CMD(keepFiles=True)

    df = pd.DataFrame.from_dict(odm_new) 
    data = df.pivot_table(index='end_vid', columns='start_vid', values='agg_cost', fill_value=1000000).values 
    vals = df.pivot_table(index='end_vid', columns='start_vid', values='agg_cost', fill_value=1000000)

    clients = data.shape[0] 
    weights = [1] * clients

    pmedian_from_cm = PMedian.from_cost_matrix(
        numpy.array(data), 
        numpy.array(weights),
        p_facilities=int(optimal_facilities), 
        name="p-median-network-distance" ) 

    pmedian_from_cm = pmedian_from_cm.solve(solver)

    station_ids = plpy.execute(f'''
    select
        z.source as end_vid,
        {facilities_table}.name
    from
        {facilities_table}
        cross join lateral (
            SELECT
                source
            FROM
                ways
                join configuration c using (tag_id)
            where
                c.tag_value not in (
                    'track',
                    'bridleway',
                    'bus_guideway',
                    'byway',
                    'cycleway',
                    'path',
                    'track',
                    'grade1',
                    'grade2',
                    'grade3',
                    'grade4',
                    'grade5',
                    'unclassified',
                    'footway',
                    'pedestrian',
                    'steps'
                )
            ORDER BY
                ways.the_geom <-> st_transform(st_centroid({facilities_table}.geom), 4326)
            limit
                1
        ) z
    ''') 

    stations = [] 

    for i in station_ids: 
        stations.append(i) 

    stations_df = pd.DataFrame.from_dict(stations)

    cleaned_points = []

    for i in pmedian_from_cm.fac2cli: 
        if len(i) > 0: 
            z = stations_df[stations_df['end_vid'] == vals.columns[pmedian_from_cm.fac2cli.index(i)]]['name'].values[0] 
            for j in i: 
                struct = {'facility': z, 'end_vid': list(vals.index)[j]} 
                cleaned_points.append(struct)

    df_startids = pd.DataFrame.from_dict(cleaned_points)

    orig_data = plpy.execute(f''' 
    select
        z.source as end_vid,
        {clients_table}.ogc_fid,
        st_transform(st_centroid({clients_table}.geom), 4326) as geom
    from
        {clients_table}
        cross join lateral (
            SELECT
                source
            FROM
                ways
                join car_config c using (tag_id)
            where
                c.tag_value not in (
                    'track',
                    'bridleway',
                    'bus_guideway',
                    'byway',
                    'cycleway',
                    'path',
                    'track',
                    'grade1',
                    'grade2',
                    'grade3',
                    'grade4',
                    'grade5',
                    'unclassified',
                    'footway',
                    'pedestrian',
                    'steps'
                )
            ORDER BY
                ways.the_geom <-> st_transform(st_centroid({clients_table}.geom), 4326)
            limit
                1
        ) z
    ''') 

    orig_formatted = [] 
    for i in orig_data: 
        orig_formatted.append(i) 

    orig_df = pd.DataFrame.from_dict(orig_formatted)

    final_df = orig_df.merge(df_startids, how='left', on='end_vid') 
    final_df = final_df.replace(numpy.nan, None) 

    return final_df.to_dict(orient='records')
$$ 
LANGUAGE 'plpython3u';

14.47

create table bk_final as
select
    *
from
    spopt_pmedian('fire_odm', 5, 'bk_blgds', 'all_stations');

14.48

create table final_stations as
select
    *
from
    all_stations
where
    name in (
        select
            distinct facility
        from
            bk_final
    )

14.3 Build balanced territories

14.59

create
or replace function pysal_skater(
    tablename TEXT,
    geometry TEXT,
    col TEXT,
    id_col TEXT,
    n_clusters INT,
    floor INT
) returns table (id TEXT, col FLOAT, regions INT, geometry TEXT) 
AS $$ 
    import spopt 
    import libpysal as lps 
    import json 
    import numpy as np 
    import pandas as pd 
    import geopandas as gpd
    from sklearn.metrics import pairwise as skm 
    import numpy
    from shapely import wkt 
    
    neighbors = plpy.execute(f'''
    select
        json_object_agg(b.{ id_col }, a.neighbors) as neighbors
    from
        { tablename } b
        cross join lateral (
            select
                array_agg(z.{ id_col } :: text) as neighbors
            from
                { tablename } z
            where
                st_intersects(z.{ geometry }, b.{ geometry })
                and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
                and z.{ id_col } != b.{ id_col }
        ) a
    where
        a.neighbors is not null
    ''')
   
    weights = plpy.execute(f'''
    select
        json_object_agg(b.{ id_col }, a.weights) as weights
    from
        { tablename } b
        cross join lateral (
            select
                array_agg(z.{ id_col }) as neighbors,
                array_fill(
                    (
                        case
                            when count(z.{ id_col }) = 0 then 0
                            else 1 / count(z.{ id_col }) :: numeric
                        end
                    ),
                    array [count(z.{id_col})::int]
                ) as weights
            from
                { tablename } z
            where
                st_intersects(z.{ geometry }, b.{ geometry })
                and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
                and z.{ id_col } != b.{ id_col }
        ) a
    where
        a.neighbors is not null
    ''')
    
    ids = [] 

    for i in json.loads(weights[0]['weights']): 
        ids.append(i) 

    w = lps.weights.W(json.loads(neighbors[0]['neighbors']), json.loads(weights[0]['weights']), silence_warnings = True, id_order=ids) 
    w.transform='r'

    to_gdf = plpy.execute(f'''
    select
        st_astext(st_transform({ geometry }, 4326)) as geometry,
        { col } :: numeric as col,
        { id_col } as id
    from
        { tablename } b
        cross join lateral (
            select
                array_agg(z.{ id_col }) as neighbors,
                array_fill(
                    (
                        case
                            when count(z.{ id_col }) = 0 then 0
                            else 1 / count(z.{ id_col }) :: numeric
                        end
                    ),
                    array [count(z.{id_col})::int]
                ) as weights
            from
                { tablename } z
            where
                st_intersects(z.{ geometry }, b.{ geometry })
                and st_npoints(st_intersection(b.{ geometry }, z.{ geometry })) > 1
                and z.{ id_col } != b.{ id_col }
        ) a
    where
        a.neighbors is not null
    ''')

    gdf_data = [] 

    for i in to_gdf: 
        gdf_data.append(i) 
        
    gdf = gpd.GeoDataFrame(gdf_data)

    spanning_forest_kwds = dict( 
        dissimilarity=skm.manhattan_distances, 
        affinity=None, 
        reduction=numpy.sum, 
        center=numpy.mean, 
        verbose=False
    )

    model = spopt.region.Skater(
        gdf, 
        w, 
        ['col'], 
        n_clusters=n_clusters, 
        floor=floor, 
        trace=False, 
        islands='increase', 
        spanning_forest_kwds=spanning_forest_kwds
    )
    
    model.solve() 
    
    gdf['regions'] = model.labels_
    
    return gdf.to_dict(orient = 'records') 
$$ 
LANGUAGE 'plpython3u';

14.60

create table bklyn_bgs as
select
    *
from
    nyc_2021_census_block_groups
where
    left(right(geoid, 10), 3) = '047'

14.61

create table brklyn_bgs_skater as
select
    *,
    st_geomfromtext(geometry) as geom
from
    pysal_skater('bklyn_bgs', 'geom', 'population', 'geoid', 8, 250)