01 — Download CGS Landslide Inventory to GeoPandas#

This notebook fetches features from the California Geological Survey (CGS) Landslide Inventory DC1 (Younger) ArcGIS MapServer and loads them into GeoPandas dataframes.

links:

Configuration#

from __future__ import annotations
import math
import json
from typing import List, Dict, Any, Optional
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
SERVICE = "https://gis.conservation.ca.gov/server/rest/services/CGS/LandslideInventory_DC2/MapServer"

def list_layers(service_root: str) -> pd.DataFrame:
    r = requests.get(f"{service_root}?f=pjson", timeout=60)
    r.raise_for_status()
    js = r.json()
    rows = []
    for L in js.get("layers", []):
        rows.append({
            "id": L.get("id"),
            "name": L.get("name"),
            "geometryType": L.get("geometryType"),
            "minScale": L.get("minScale"),
            "maxScale": L.get("maxScale"),
            "maxRecordCount": L.get("maxRecordCount"),
            "queryFormats": L.get("supportedQueryFormats"),
        })
    return pd.DataFrame(rows).sort_values("id").reset_index(drop=True)

layers_df = list_layers(SERVICE)
layers_df
id name geometryType minScale maxScale maxRecordCount queryFormats
0 0 CGS Mapped, Needs Review or Older Mapping Stan... None 0 17000 None None
1 1 Landslide (Single Feature) Point esriGeometryPoint 75000 17000 None None
2 2 Landslide (Deposit) Point esriGeometryPoint 75000 17000 None None
3 3 Landslide (Source) Point esriGeometryPoint 75000 17000 None None
4 4 Landslide (Single Feature) Line esriGeometryPolyline 75000 17000 None None
5 5 Landslide (Deposit) Line esriGeometryPolyline 75000 17000 None None
6 6 Landslide (Source) Line esriGeometryPolyline 75000 17000 None None
7 7 Landslide (Single Feature) esriGeometryPolygon 0 300000 None None
8 8 Landslide (Single Feature-Questionable) esriGeometryPoint 50000 0 None None
9 9 Landslide (Single Feature) esriGeometryPolygon 300000 0 None None
10 10 Landslide (Source/Scarp) esriGeometryPolygon 0 300000 None None
11 11 Landslide (Source/Scarp-Questionable) esriGeometryPoint 50000 0 None None
12 12 Landslide (Source/Scarp) esriGeometryPolygon 300000 0 None None
13 13 Landslide (Deposit) esriGeometryPolygon 0 300000 None None
14 14 Landslide (Deposit-Questionable) esriGeometryPoint 50000 0 None None
15 15 Landslide (Deposit) esriGeometryPolygon 300000 0 None None

Download selected layers into GeoPandas#

LAYER_IDS = [13]        # both deposit layers
BATCH = 1000
OUT_SRID = 3857

def fetch_page_geojson(service, layer_id, offset, limit, out_srid=3857):
    url = f"{service}/{layer_id}/query"
    payload = {
        "where": "1=1",
        "outFields": "*",
        "returnGeometry": "true",
        "resultOffset": str(offset),
        "resultRecordCount": str(min(limit, 1000)),
        "outSR": str(out_srid),
        "f": "geojson",
    }
    r = requests.post(url, data=payload, timeout=120)
    r.raise_for_status()
    return r.json()

all_gdfs = {}
for lid in LAYER_IDS:
    print(f"\nLayer {lid}: downloading …")
    pages = []
    offset = 0
    while True:
        gj = fetch_page_geojson(SERVICE, lid, offset=offset, limit=BATCH, out_srid=OUT_SRID)
        # Build a GDF directly from the page (handles geometry correctly)
        page_gdf = gpd.GeoDataFrame.from_features(gj, crs=f"EPSG:{OUT_SRID}")
        got = len(page_gdf)
        if got == 0:
            break
        page_gdf["source_layer_id"] = lid  # keep provenance
        pages.append(page_gdf)

        offset += got
        print(f"  fetched {got} (total {offset})")

        # ArcGIS: exceededTransferLimit=True means there are more records
        if not gj.get("exceededTransferLimit", False):
            break

    gdf = pd.concat(pages, ignore_index=True) if pages else gpd.GeoDataFrame(
        geometry=gpd.GeoSeries(dtype="geometry"), crs=f"EPSG:{OUT_SRID}"
    )
    print(f"Layer {lid}: final rows = {len(gdf)}")
    all_gdfs[lid] = gdf


# # Merge 13 + 15 into a single deposits GDF
# gdf_deposits = pd.concat([all_gdfs.get(13, gpd.GeoDataFrame()),
#                           all_gdfs.get(15, gpd.GeoDataFrame())],
#                          ignore_index=True)
gdf_13 = all_gdfs.get(13, gpd.GeoDataFrame())

# print("\nMerged deposits rows:", len(gdf_deposits))
# gdf_deposits.head()
print("\nMerged deposits rows:", len(gdf_13))
gdf_13.head()
Layer 13: downloading …
  fetched 1000 (total 1000)
  fetched 1000 (total 2000)
  fetched 1000 (total 3000)
  fetched 1000 (total 4000)
  fetched 1000 (total 5000)
  fetched 1000 (total 6000)
  fetched 1000 (total 7000)
  fetched 1000 (total 8000)
  fetched 1000 (total 9000)
  fetched 1000 (total 10000)
  fetched 1000 (total 11000)
  fetched 1000 (total 12000)
  fetched 909 (total 12909)
Layer 13: final rows = 12909

Merged deposits rows: 12909
C:\Users\loicb\AppData\Local\Temp\ipykernel_14772\2163885549.py:42: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  gdf = pd.concat(pages, ignore_index=True) if pages else gpd.GeoDataFrame(
geometry sec_geol_unit_map_symb SymSize SHAPE.STArea() SHAPE.STLength() OBJECTID creation_date revision_date geom_rev_date geom_rev_staff ... superseded citable_product_url gis_source created_user created_date last_edited_user last_edited_date FeatID display_class source_layer_id
0 POLYGON ((-13276650.321 4068567.489, -13276652... None 2.858618 2299.895764 268.182677 1965 1362700800000 None None None ... N None G:\CGS\GM_Work\Landslide Inventories\StateLand... None NaN None NaN 1965 2 13
1 POLYGON ((-13276837.792 4076551.465, -13276826... None 3.883193 9061.502229 777.839423 2611 1367366400000 None None None ... N None G:\CGS\GM_Work\Landslide Inventories\StateLand... None NaN None NaN 2611 2 13
2 POLYGON ((-13571751.348 4448987.266, -13571747... None 5.290792 4525.327447 285.107144 3970 1450889472000 None None None ... N G:\CGS\GM_Work\Landslide Inventories\StateLand... NROTH 1.450983e+12 NROTH 1.450983e+12 3970 2 13
3 POLYGON ((-13572747.205 4449035.828, -13572760... None 9.967204 23680.277869 791.939850 3971 1450889472000 None None None ... N G:\CGS\GM_Work\Landslide Inventories\StateLand... NROTH 1.450983e+12 NROTH 1.450983e+12 3971 2 13
4 POLYGON ((-13572223.032 4449034.498, -13572217... None 4.277160 2231.315528 173.893848 3972 1450889472000 None None None ... N G:\CGS\GM_Work\Landslide Inventories\StateLand... NROTH 1.450983e+12 NROTH 1.450983e+12 3972 2 13

5 rows × 50 columns

Quick peek#

# Show a few columns; adjust as needed
for lid, gdf in all_gdfs.items():
    print(f"\nLayer {lid} — head():\n")
    display(gdf.head(3))
    print(gdf.crs)
Layer 13 — head():
geometry sec_geol_unit_map_symb SymSize SHAPE.STArea() SHAPE.STLength() OBJECTID creation_date revision_date geom_rev_date geom_rev_staff ... superseded citable_product_url gis_source created_user created_date last_edited_user last_edited_date FeatID display_class source_layer_id
0 POLYGON ((-13276650.321 4068567.489, -13276652... None 2.858618 2299.895764 268.182677 1965 1362700800000 None None None ... N None G:\CGS\GM_Work\Landslide Inventories\StateLand... None NaN None NaN 1965 2 13
1 POLYGON ((-13276837.792 4076551.465, -13276826... None 3.883193 9061.502229 777.839423 2611 1367366400000 None None None ... N None G:\CGS\GM_Work\Landslide Inventories\StateLand... None NaN None NaN 2611 2 13
2 POLYGON ((-13571751.348 4448987.266, -13571747... None 5.290792 4525.327447 285.107144 3970 1450889472000 None None None ... N G:\CGS\GM_Work\Landslide Inventories\StateLand... NROTH 1.450983e+12 NROTH 1.450983e+12 3970 2 13

3 rows × 50 columns

EPSG:3857

Optional: save to files#

from pathlib import Path

SAVE_GEOJSON_DIR = "../data/"

if SAVE_GEOJSON_DIR:
    outdir = Path(SAVE_GEOJSON_DIR)
    outdir.mkdir(parents=True, exist_ok=True)
    for lid, gdf in all_gdfs.items():
        out_path = outdir / f"cgs_DC2{lid}.geojson"
        gdf.to_file(out_path, driver="GeoJSON")
        print("Wrote", out_path)
Wrote ..\Data\cgs_DC213.geojson

Next steps (separate notebook)#

  • Rename columns to your canonical schema (material, movement, confidence, etc.).

  • Add PGA values from your raster/contours.

  • Push to PostGIS (e.g., GeoDataFrame.to_postgis(...) via SQLAlchemy/psycopg2).