import argparse
import json
import collections
import datetime
import logging
import operator
import pathlib
import re

import bs4
import psycopg2
import requests
import shapely
from playwright.sync_api import sync_playwright

logger = logging.getLogger(__name__)

data_dir = pathlib.Path("data/")
search_start_url = (
    "https://www.managetickets.com/derecApp/ticketSearchAndStatusSelector.jsp"
)
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}


def init_db(db):
    with db.cursor() as cur:
        raw = open("sql/managetickets/init.sql").read()
        for stmt in filter(lambda x: x.strip(), raw.split(";")):
            cur.execute(stmt)

    db.commit()


def available_states():
    with sync_playwright() as p:
        logger.debug("launch")
        browser = p.firefox.launch(headless=True)
        page = browser.new_page()
        logger.debug("goto")
        page.goto(search_start_url)
        logger.debug("wait for selector")
        states = page.wait_for_selector('select[name="enc"]', state="visible")

        for option in filter(
            lambda s: re.match("[A-Z\.]{2,4}.*", s.inner_text()),
            states.query_selector_all("option"),
        ):
            yield option.inner_text()


def states_main():
    for state in available_states():
        print(state)


def search_state(state, start):
    global logger

    with sync_playwright() as p:
        logger.debug("launch")
        browser = p.firefox.launch(headless=True)
        page = browser.new_page()
        logger.debug(f"goto {search_start_url}")
        page.goto(search_start_url)
        logger.debug(f"select state {state}")
        page.select_option('select[name="enc"]', label=state)

        logger.debug("wait for idle")
        page.wait_for_load_state("networkidle")

        # this might vary per state
        logger.debug("fill form")
        page.fill("input#company", "flock")
        page.fill("input#startDate", start.strftime("%m/%d/%Y"))

        logger.debug("search")
        page.click('input.button[name="searchbutton2"]')

        while True:
            logger.debug("wait for idle")
            page.wait_for_load_state("networkidle")

            logger.debug("get table")
            table = page.query_selector("table.pure-table")
            if not table:
                logger.debug("no results, exit")
                break

            for row in table_to_dicts(table.inner_html()):
                yield {"state": state, **row}

            button = page.query_selector(
                'input[type="submit"].rounded8[value="Next 50 >>>"]'
            )
            if button:
                logger.debug("click next")
                button.click()
            else:
                logger.debug("no more, exit")
                break

        browser.close()


def table_to_dicts(html):
    soup = bs4.BeautifulSoup(html, "html.parser")

    thead = soup.find("thead")
    tbody = soup.find("tbody")

    keys = [re.sub(" ", "_", elem.text.lower()) for elem in thead.find_all("th")]
    for row in tbody.find_all("tr"):
        out = dict()
        for key, value in zip(keys, row.find_all("td")):
            if key == "ticket_#":
                url = value.find("a")["href"]
                out["url"] = url.removeprefix("./")
                out["ticket_no"] = value.text
            elif key == "company":
                out["excavator_company"] = value.text
            else:
                out[key] = value.text

        yield out


def search_main(db, state, start):
    if state is None:
        states = list(available_states())
    else:
        states = [state]

    with db.cursor() as cur:
        for state in states:
            if state == "NE":
                logger.error(
                    "NE requires email and cannot be searched here (last checked: 2025-09-25)"
                )
                continue
            elif state == "TX":
                logger.error(
                    "TX search is online but appears to be broken (last checked: 2025-09-25)"
                )
                continue

            logging.info(f"search {state}")
            for line in search_state(state, start):
                stmt = """
INSERT INTO managetickets.search
(url, ticket_no, type, call_date, excavator_company, addr, street, city, state)
VALUES (%(url)s, %(ticket_no)s, %(type)s, %(call_date)s, %(excavator_company)s, %(addr)s, %(street)s, %(city)s, %(state)s)
ON CONFLICT (ticket_no) DO NOTHING;
"""
                cur.execute(stmt, line)

            db.commit()


def ticket_main(db, ticket_number, refresh_cache=False):
    if ticket_number is None:
        with db.cursor() as cur:
            cur.execute("SELECT ticket_no FROM managetickets.ticket_todo")
            tickets = list(map(operator.itemgetter(0), cur.fetchall()))
    else:
        tickets = [ticket_number]

    ticket_stmt = """
INSERT INTO managetickets.ticket 
(
ticket_no,
updated_by,
original_call_date,
release_date,
response_due_by,
work_to_begin_by,
expiration_date,
caller_company_name,
caller_contact_name,
caller_contact_phone,
caller_contact_fax,
caller_address,
caller_email,
caller_job_site_contact_name,
caller_job_site_contact_phone,
dig_type_of_work,
dig_work_done_for,
dig_explosives,
dig_trenchless,
dig_permit,
dig_job_number,
dig_site_state,
dig_site_place,
dig_site_address,
dig_site_county,
dig_site_subdivision,
dig_site_intersecting_street,
dig_site_extent_of_work,
dig_site_remarks,
dig_site_nw_lat,
dig_site_nw_lon,
dig_site_se_lat,
dig_site_se_lon,
location,
raw_data
)
VALUES
(
%(ticket_no)s,
%(updated_by)s,
%(original_call_date)s,
%(release_date)s,
%(response_due_by)s,
%(work_to_begin_by)s,
%(expiration_date)s,
%(caller_company_name)s,
%(caller_contact_name)s,
%(caller_contact_phone)s,
%(caller_contact_fax)s,
%(caller_address)s,
%(caller_email)s,
%(caller_job_site_contact_name)s,
%(caller_job_site_contact_phone)s,
%(dig_type_of_work)s,
%(dig_work_done_for)s,
%(dig_explosives)s,
%(dig_trenchless)s,
%(dig_permit)s,
%(dig_job_number)s,
%(dig_site_state)s,
%(dig_site_place)s,
%(dig_site_address)s,
%(dig_site_county)s,
%(dig_site_subdivision)s,
%(dig_site_intersecting_street)s,
%(dig_site_extent_of_work)s,
%(dig_site_remarks)s,
%(dig_site_nw_lat)s,
%(dig_site_nw_lon)s,
%(dig_site_se_lat)s,
%(dig_site_se_lon)s,
ST_SetSRID(ST_GeomFromText(NULLIF(%(location)s, '')), 4326),
%(raw_data)s
)
ON CONFLICT (ticket_no) DO NOTHING
"""

    notified_stmt = """
INSERT INTO managetickets.notified
(
ticket_no,
district,
company_name,
marking_concerns,
damage,
customer_service,
status
)
VALUES
(
%(ticket_no)s,
%(district)s,
%(company_name)s,
%(marking_concerns)s,
%(damage)s,
%(customer_service)s,
%(status)s
)
"""
    status_stmt = """
INSERT INTO managetickets.status_history
(
ticket_no,
date,
type,
district,
company,
status,
notes
)
VALUES
(
%(ticket_no)s,
%(date)s,
%(type)s,
%(district)s,
%(company)s,
%(status)s,
%(notes)s
)
"""
    with db.cursor() as cur:
        for ticket_no in tickets:
            cur.execute(
                "SELECT url FROM managetickets.search WHERE ticket_no = %(ticket_no)s",
                {"ticket_no": ticket_no},
            )
            (url,) = cur.fetchone()
            real_url = f"https://www.managetickets.com/dcrecApp/{url}"  # dc is hard-coded, in principle these are state-specific so check here if things get weird
            dst = data_dir / "www.managetickets.com" / url
            logger.debug(f"process {real_url}")
            if refresh_cache or not dst.exists():
                logger.info(f"get {real_url}")
                req = requests.get(real_url, headers=HEADERS)
                req.raise_for_status()
                dst.parent.mkdir(parents=True, exist_ok=True)
                dst.write_text(req.text)

            html = dst.read_text()
            ticket = parse_ticket(html)
            cur.execute(
                "SELECT COUNT(*) FROM managetickets.ticket WHERE ticket_no = %s",
                (ticket_no,),
            )
            (count,) = cur.fetchone()
            if count == 0 or refresh_cache:
                logger.info(f"insert ticket {ticket_no}")
                cur.execute(
                    "DELETE FROM managetickets.ticket WHERE ticket_no = %s",
                    (ticket_no,),
                )
                cur.execute(
                    "DELETE FROM managetickets.status_history WHERE ticket_no = %s",
                    (ticket_no,),
                )
                cur.execute(
                    "DELETE FROM managetickets.notified WHERE ticket_no = %s",
                    (ticket_no,),
                )
                cur.execute(ticket_stmt, ticket.ticket)
                for notified in ticket.notified:
                    cur.execute(notified_stmt, {"ticket_no": ticket_no, **notified})
                for status in ticket.status_history:
                    cur.execute(status_stmt, {"ticket_no": ticket_no, **status})
                db.commit()


Ticket = collections.namedtuple("Ticket", ["ticket", "notified", "status_history"])

ticket_remap = {
    "company_name": "caller_company_name",
    "contact_name": "caller_contact_name",
    "fax_phone": "caller_contact_fax",
    "email_address": "caller_email",
    "job_site_contact": "caller_job_site_contact_name",
    "type_of_work": "dig_type_of_work",
    "work_being_done_for": "dig_work_done_for",
    "explosives": "dig_explosives",
    "trenchless": "dig_trenchless",
    "permit": "dig_permit",
    "job_number": "dig_job_number",
    "state": "dig_site_state",
    "place": "dig_site_place",
    "address": "dig_site_address",
    "county": "dig_site_county",
    "subdivision": "dig_site_subdivision",
    "intersecting_street": "dig_site_intersecting_street",
    "extent_of_work": "dig_site_extent_of_work",
    "remarks": "dig_site_remarks",
    "map_coord_nw_lat": "dig_site_nw_lat",
    "se_lat": "dig_site_se_lat",
    "ticket_number": "ticket_no",
    "ticket": "ticket_no",
}


def find_feature(raw):
    try:
        start = raw.index("start polygon")
        end = raw.index("end polygon")
    except ValueError:
        return

    poly = raw[start:end]
    coords = list()
    for coord in re.findall("\{lat: (.+), lng: (.+)\}", poly):
        coords.append(tuple(map(float, coord)))

    return into_feature(coords, properties={})


def into_feature(coords, properties=None):
    if properties is None:
        properties = {}

    coords = coords + [coords[0]]
    coords = list(map(lambda c: [c[1], c[0]], coords))
    coords = [coords]
    return {
        "type": "Feature",
        "geometry": {
            "type": "Polygon",
            "coordinates": coords,
        },
        "properties": properties,
    }


def parse_ticket(html):
    soup = bs4.BeautifulSoup(html, "html.parser")

    ticket = dict()
    kv_raw = dict()
    seen_phone = False
    seen_lon = False

    for div in soup.find_all("div", {"class": "pure-u-1-1"}):
        key_raw = div.find(["td", "span"], {"class": "display-line-label"})
        if key_raw is None:
            continue

        key = re.sub(" ", "_", key_raw.text.lower().rstrip(":"))
        key = ticket_remap.get(key, key)
        if key == "phone":
            if seen_phone:
                key = "caller_job_site_contact_phone"
            else:
                key = "caller_contact_phone"
                seen_phone = True

        if key == "lon":
            if seen_lon:
                key = "dig_site_se_lon"
            else:
                key = "dig_site_nw_lon"
                seen_lon = True

        value_raw = div.find(["td", "span"], {"class": "display-line"})
        if value_raw is None:
            continue

        value = value_raw.text.strip()
        ticket[key] = value
        kv_raw[key_raw.text] = value_raw.text

    location = find_feature(html)
    if location:
        ticket["location"] = shapely.geometry.shape(location).wkt

    for mando in {
        "ticket_no",
        "updated_by",
        "original_call_date",
        "release_date",
        "response_due_by",
        "work_to_begin_by",
        "expiration_date",
        "caller_company_name",
        "caller_contact_name",
        "caller_contact_phone",
        "caller_contact_fax",
        "caller_address",
        "caller_email",
        "caller_job_site_contact_name",
        "caller_job_site_contact_phone",
        "dig_type_of_work",
        "dig_work_done_for",
        "dig_explosives",
        "dig_trenchless",
        "dig_permit",
        "dig_job_number",
        "dig_site_state",
        "dig_site_place",
        "dig_site_address",
        "dig_site_county",
        "dig_site_subdivision",
        "dig_site_intersecting_street",
        "dig_site_extent_of_work",
        "dig_site_remarks",
        "dig_site_nw_lat",
        "dig_site_nw_lon",
        "dig_site_se_lat",
        "dig_site_se_lon",
        "location",
    }:
        if mando not in ticket:
            logger.debug(f"missing key: {mando}")
            ticket[mando] = None

    ticket["raw_data"] = json.dumps(kv_raw)

    return Ticket(ticket=ticket, status_history=[], notified=[])


def main():
    parser = argparse.ArgumentParser("scrape data from managetickets")
    parser.add_argument("--verbose", action="store_true")
    subparsers = parser.add_subparsers(dest="command", required=True)

    parser_search = subparsers.add_parser("search", help="search state pages")
    parser_search.add_argument(
        "--start",
        type=datetime.datetime.fromisoformat,
        default=(datetime.datetime.now() - datetime.timedelta(days=30)),
        help="start date for search",
    )
    parser_search.add_argument("--all", action="store_true", help="start at 2017-01-01")
    parser_search.add_argument(
        "--state", default=None, help="single state to search, default is all"
    )

    parser_tickets = subparsers.add_parser("ticket", help="retrieve ticket information")
    parser_tickets.add_argument(
        "--refresh-cache", action="store_true", help="replace cache"
    )
    parser_tickets.add_argument("--ticket", type=int, help="ticket number to process")

    parser_states = subparsers.add_parser(
        "states", help="list available states to search"
    )

    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

    db = psycopg2.connect(
        host="localhost", database="flock_utilities", user="xxx", password="xxx"
    )
    init_db(db)

    match args.command:
        case "search":
            if args.all:
                start = datetime.datetime.fromisoformat("2017-01-01")
            else:
                start = args.start

            search_main(db, args.state, start)
        case "ticket":
            ticket_main(db, args.ticket, args.refresh_cache)
        case "states":
            states_main()
        case _:
            print("unknown command {_}")


if __name__ == "__main__":
    main()
