01 — Download CGS Landslide Inventory to GeoPandas

Contents

01 — Download CGS Landslide Inventory to GeoPandas#

This notebook fetches features from the California Geological Survey (CGS) Landslide Inventory DC1 (Younger) ArcGIS MapServer and loads them into GeoPandas dataframes.

links:

Configuration#

from __future__ import annotations
import math
import json
from typing import List, Dict, Any, Optional
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape

SERVICE = "https://gis.conservation.ca.gov/server/rest/services/CGS/LandslideInventory_DC2/MapServer"

def list_layers(service_root: str) -> pd.DataFrame:
    r = requests.get(f"{service_root}?f=pjson", timeout=60)
    r.raise_for_status()
    js = r.json()
    rows = []
    for L in js.get("layers", []):
        rows.append({
            "id": L.get("id"),
            "name": L.get("name"),
            "geometryType": L.get("geometryType"),
            "minScale": L.get("minScale"),
            "maxScale": L.get("maxScale"),
            "maxRecordCount": L.get("maxRecordCount"),
            "queryFormats": L.get("supportedQueryFormats"),
        })
    return pd.DataFrame(rows).sort_values("id").reset_index(drop=True)

layers_df = list_layers(SERVICE)
layers_df

	id	name	geometryType	minScale	maxScale	maxRecordCount	queryFormats
0	0	CGS Mapped, Needs Review or Older Mapping Stan...	None	0	17000	None	None
1	1	Landslide (Single Feature) Point	esriGeometryPoint	75000	17000	None	None
2	2	Landslide (Deposit) Point	esriGeometryPoint	75000	17000	None	None
3	3	Landslide (Source) Point	esriGeometryPoint	75000	17000	None	None
4	4	Landslide (Single Feature) Line	esriGeometryPolyline	75000	17000	None	None
5	5	Landslide (Deposit) Line	esriGeometryPolyline	75000	17000	None	None
6	6	Landslide (Source) Line	esriGeometryPolyline	75000	17000	None	None
7	7	Landslide (Single Feature)	esriGeometryPolygon	0	300000	None	None
8	8	Landslide (Single Feature-Questionable)	esriGeometryPoint	50000	0	None	None
9	9	Landslide (Single Feature)	esriGeometryPolygon	300000	0	None	None
10	10	Landslide (Source/Scarp)	esriGeometryPolygon	0	300000	None	None
11	11	Landslide (Source/Scarp-Questionable)	esriGeometryPoint	50000	0	None	None
12	12	Landslide (Source/Scarp)	esriGeometryPolygon	300000	0	None	None
13	13	Landslide (Deposit)	esriGeometryPolygon	0	300000	None	None
14	14	Landslide (Deposit-Questionable)	esriGeometryPoint	50000	0	None	None
15	15	Landslide (Deposit)	esriGeometryPolygon	300000	0	None	None

Download selected layers into GeoPandas#

LAYER_IDS = [13]        # both deposit layers
BATCH = 1000
OUT_SRID = 3857

def fetch_page_geojson(service, layer_id, offset, limit, out_srid=3857):
    url = f"{service}/{layer_id}/query"
    payload = {
        "where": "1=1",
        "outFields": "*",
        "returnGeometry": "true",
        "resultOffset": str(offset),
        "resultRecordCount": str(min(limit, 1000)),
        "outSR": str(out_srid),
        "f": "geojson",
    }
    r = requests.post(url, data=payload, timeout=120)
    r.raise_for_status()
    return r.json()

all_gdfs = {}
for lid in LAYER_IDS:
    print(f"\nLayer {lid}: downloading …")
    pages = []
    offset = 0
    while True:
        gj = fetch_page_geojson(SERVICE, lid, offset=offset, limit=BATCH, out_srid=OUT_SRID)
        # Build a GDF directly from the page (handles geometry correctly)
        page_gdf = gpd.GeoDataFrame.from_features(gj, crs=f"EPSG:{OUT_SRID}")
        got = len(page_gdf)
        if got == 0:
            break
        page_gdf["source_layer_id"] = lid  # keep provenance
        pages.append(page_gdf)

        offset += got
        print(f"  fetched {got} (total {offset})")

        # ArcGIS: exceededTransferLimit=True means there are more records
        if not gj.get("exceededTransferLimit", False):
            break

    gdf = pd.concat(pages, ignore_index=True) if pages else gpd.GeoDataFrame(
        geometry=gpd.GeoSeries(dtype="geometry"), crs=f"EPSG:{OUT_SRID}"
    )
    print(f"Layer {lid}: final rows = {len(gdf)}")
    all_gdfs[lid] = gdf


# # Merge 13 + 15 into a single deposits GDF
# gdf_deposits = pd.concat([all_gdfs.get(13, gpd.GeoDataFrame()),
#                           all_gdfs.get(15, gpd.GeoDataFrame())],
#                          ignore_index=True)
gdf_13 = all_gdfs.get(13, gpd.GeoDataFrame())

# print("\nMerged deposits rows:", len(gdf_deposits))
# gdf_deposits.head()
print("\nMerged deposits rows:", len(gdf_13))
gdf_13.head()

Layer 13: downloading …
  fetched 1000 (total 1000)
  fetched 1000 (total 2000)
  fetched 1000 (total 3000)
  fetched 1000 (total 4000)
  fetched 1000 (total 5000)
  fetched 1000 (total 6000)
  fetched 1000 (total 7000)
  fetched 1000 (total 8000)
  fetched 1000 (total 9000)
  fetched 1000 (total 10000)
  fetched 1000 (total 11000)
  fetched 1000 (total 12000)
  fetched 909 (total 12909)
Layer 13: final rows = 12909

Merged deposits rows: 12909

C:\Users\loicb\AppData\Local\Temp\ipykernel_14772\2163885549.py:42: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  gdf = pd.concat(pages, ignore_index=True) if pages else gpd.GeoDataFrame(

	geometry	sec_geol_unit_map_symb	SymSize	SHAPE.STArea()	SHAPE.STLength()	OBJECTID	creation_date	revision_date	geom_rev_date	geom_rev_staff	...	superseded	citable_product_url	gis_source	created_user	created_date	last_edited_user	last_edited_date	FeatID	display_class	source_layer_id
0	POLYGON ((-13276650.321 4068567.489, -13276652...	None	2.858618	2299.895764	268.182677	1965	1362700800000	None	None	None	...	N	None	G:\CGS\GM_Work\Landslide Inventories\StateLand...	None	NaN	None	NaN	1965	2	13
1	POLYGON ((-13276837.792 4076551.465, -13276826...	None	3.883193	9061.502229	777.839423	2611	1367366400000	None	None	None	...	N	None	G:\CGS\GM_Work\Landslide Inventories\StateLand...	None	NaN	None	NaN	2611	2	13
2	POLYGON ((-13571751.348 4448987.266, -13571747...	None	5.290792	4525.327447	285.107144	3970	1450889472000	None	None	None	...	N		G:\CGS\GM_Work\Landslide Inventories\StateLand...	NROTH	1.450983e+12	NROTH	1.450983e+12	3970	2	13
3	POLYGON ((-13572747.205 4449035.828, -13572760...	None	9.967204	23680.277869	791.939850	3971	1450889472000	None	None	None	...	N		G:\CGS\GM_Work\Landslide Inventories\StateLand...	NROTH	1.450983e+12	NROTH	1.450983e+12	3971	2	13
4	POLYGON ((-13572223.032 4449034.498, -13572217...	None	4.277160	2231.315528	173.893848	3972	1450889472000	None	None	None	...	N		G:\CGS\GM_Work\Landslide Inventories\StateLand...	NROTH	1.450983e+12	NROTH	1.450983e+12	3972	2	13

5 rows × 50 columns

Quick peek#

# Show a few columns; adjust as needed
for lid, gdf in all_gdfs.items():
    print(f"\nLayer {lid} — head():\n")
    display(gdf.head(3))
    print(gdf.crs)

Layer 13 — head():

	geometry	sec_geol_unit_map_symb	SymSize	SHAPE.STArea()	SHAPE.STLength()	OBJECTID	creation_date	revision_date	geom_rev_date	geom_rev_staff	...	superseded	citable_product_url	gis_source	created_user	created_date	last_edited_user	last_edited_date	FeatID	display_class	source_layer_id
0	POLYGON ((-13276650.321 4068567.489, -13276652...	None	2.858618	2299.895764	268.182677	1965	1362700800000	None	None	None	...	N	None	G:\CGS\GM_Work\Landslide Inventories\StateLand...	None	NaN	None	NaN	1965	2	13
1	POLYGON ((-13276837.792 4076551.465, -13276826...	None	3.883193	9061.502229	777.839423	2611	1367366400000	None	None	None	...	N	None	G:\CGS\GM_Work\Landslide Inventories\StateLand...	None	NaN	None	NaN	2611	2	13
2	POLYGON ((-13571751.348 4448987.266, -13571747...	None	5.290792	4525.327447	285.107144	3970	1450889472000	None	None	None	...	N		G:\CGS\GM_Work\Landslide Inventories\StateLand...	NROTH	1.450983e+12	NROTH	1.450983e+12	3970	2	13

3 rows × 50 columns

EPSG:3857

Optional: save to files#

from pathlib import Path

SAVE_GEOJSON_DIR = "../data/"

if SAVE_GEOJSON_DIR:
    outdir = Path(SAVE_GEOJSON_DIR)
    outdir.mkdir(parents=True, exist_ok=True)
    for lid, gdf in all_gdfs.items():
        out_path = outdir / f"cgs_DC2{lid}.geojson"
        gdf.to_file(out_path, driver="GeoJSON")
        print("Wrote", out_path)

Wrote ..\Data\cgs_DC213.geojson

Next steps (separate notebook)#

Rename columns to your canonical schema (material, movement, confidence, etc.).
Add PGA values from your raster/contours.
Push to PostGIS (e.g., GeoDataFrame.to_postgis(...) via SQLAlchemy/psycopg2).