create table tree_similarity as
-- Finds the count of all trees in each neighborhood
with a as (
select
count(t.*) as total_trees,
n.neighborhood
from
nyc_2015_tree_census t
join nyc_neighborhoods n on st_intersects(t.geom, n.geom)
group by
n.neighborhood
)
-- Finds the count of each type of tree in each neighborhood
select
n.neighborhood,
(
count(t.*) filter (
where
t.spc_common ilike '%pine%'
) :: numeric / a.total_trees
) as pine,
(
count(t.*) filter (
where
t.spc_common ilike '%maple%'
) :: numeric / a.total_trees
) as maple,
(
count(t.*) filter (
where
t.spc_common ilike '%linden%'
) :: numeric / a.total_trees
) as linden,
(
count(t.*) filter (
where
t.spc_common ilike '%honeylocust%'
) :: numeric / a.total_trees
) as honeylocust,
(
count(t.*) filter (
where
t.spc_common ilike '%oak%'
) :: numeric / a.total_trees
) as oak
-- Joins the above data with data from the original CTE
from
nyc_2015_tree_census t
join nyc_neighborhoods n on st_intersects(t.geom, n.geom)
join a using (neighborhood)
group by
n.neighborhood,
a.total_trees
with a as (
select
*
from
tree_similarity
where
neighborhood = 'Harlem'
)
select
t.neighborhood,
-- Here we subtract the values from our target neighborhood in table "t"
-- which is Harlem, from the average values across the city
t.pine - a.pine as pine_dif,
t.maple - a.maple as maple_dif,
t.oak - a.oak as oak_dif,
t.linden - a.linden as linden_dif,
t.honeylocust - a.honeylocust as honeylocust_dif
from
tree_similarity t,
a
where
t.neighborhood != 'Harlem'
create table harlem_tree_similarity as with a as (
select
*
from
tree_similarity
where
neighborhood = 'Harlem'
),
-- A: Find the difference between Harlem and all other neighborhoods
b as (
select
t.neighborhood,
t.pine - a.pine as pine_dif,
t.maple - a.maple as maple_dif,
t.oak - a.oak as oak_dif,
t.linden - a.linden as linden_dif,
t.honeylocust - a.honeylocust as honeylocust_dif
from
tree_similarity t,
a
where
t.neighborhood != 'Harlem'
),
-- B: Find the min and max values in each column and store it as an array
c as (
select
array [min(pine_dif), max(pine_dif)] as pine,
array [min(maple_dif), max(maple_dif)] as maple,
array [min(oak_dif), max(oak_dif)] as oak,
array [min(linden_dif), max(linden_dif)] as linden,
array [min(honeylocust_dif), max(honeylocust_dif)] as honeylocust
from
b
),
-- C: Find the absolute value of each difference value, normalize the data, and subtract that value from 1
d as (
select
b.neighborhood,
1 - (abs(b.pine_dif) - c.pine [1]) / (c.pine [2] - c.pine [1]) as pine_norm,
1 - (b.maple_dif - c.maple [1]) / (c.maple [2] - c.maple [1]) as maple_norm,
1 - (b.oak_dif - c.oak [1]) / (c.oak [2] - c.oak [1]) as oak_norm,
1 - (b.linden_dif - c.linden [1]) / (c.linden [2] - c.linden [1]) as linden_norm,
1 - (b.honeylocust_dif - c.honeylocust [1]) / (c.honeylocust [2] - c.honeylocust [1]) as honeylocust_norm
from
b,
c
)
-- D: Add up and divide the values
select
neighborhood,
(
pine_norm + maple_norm + oak_norm + linden_norm + honeylocust_norm
) / 5 as final
from
d
order by
2 desc