#!/usr/bin/env python3
#
# generate-fake-report-api-data.py
#
# Synthetic sample data for Vipps MobilePay Report API v2 "dates" endpoint.
# Writes gzipped JSON payloads per date for topics: funds and fees.

import argparse, base64, datetime as dt, gzip, json, os, random, string
from collections import defaultdict, deque

ISO = "%Y-%m-%dT%H:%M:%S.%fZ"

def dtrange(start, end):
    cur = start
    while cur <= end:
        yield cur
        cur += dt.timedelta(days=1)

def rand_time_on(date, start_h=6, end_h=23):
    # Random time within [start_h, end_h)
    h = random.randint(start_h, end_h - 1)
    m = random.randint(0, 59)
    s = random.randint(0, 59)
    us = random.randint(0, 999_999)
    return dt.datetime(date.year, date.month, date.day, h, m, s, us, tzinfo=dt.timezone.utc)

def b64_cursor(s: str) -> str:
    return base64.b64encode(s.encode()).decode()

def gen_reference():
    # order-<12 alnum>
    return "order-" + "".join(random.choices(string.ascii_lowercase + string.digits, k=12))

def gen_psp_ref(prefix=""):
    # numeric-ish string, optionally prefixed for clarity
    digits = "".join(random.choices(string.digits, k=10))
    return f"{prefix}{digits}" if prefix else digits

def masked_phone():
    return "xxxx " + "".join(random.choices(string.digits, k=4))

def rand_name():
    first = random.choice(["John","Jane","Alex","Chris","Emma","Noah","Liam","Olivia","Ava","Mia","Lucas","Sofie","Ella","Filip","Sara"])
    last  = random.choice(["Doe","Smith","Hansen","Johansen","Olsen","Andersen","Larsen","Nilsen","Karlsen","Eriksen"])
    return f"{first} {last}"

def amount_ore():
    # Skewed small-to-medium amounts (NOK). 20–5000 NOK, mode around 350 NOK
    # Use triangular distribution (in øre)
    low, high, mode = 2000, 500000, 35000  # 20.00–5000.00 NOK, mode 350.00 NOK
    val = random.triangular(low, high, mode)
    return int(round(val))

def capture_fee_ore(amount, rate=0.0175, fixed=150):
    # Simple fee model: 1.75% + NOK 1.50 (150 øre). Round to nearest øre.
    return int(round(amount * rate)) + fixed

def weekday_weight(d: dt.date):
    # Slightly busier Fri/Sat, a tad quieter Sun
    return {
        0: 1.00,  # Mon
        1: 1.05,  # Tue
        2: 1.05,  # Wed
        3: 1.10,  # Thu
        4: 1.20,  # Fri
        5: 1.15,  # Sat
        6: 0.90,  # Sun
    }[d.weekday()]

def parse_args():
    p = argparse.ArgumentParser(description="Generate synthetic Vipps MobilePay Report API v2 date payloads")
    p.add_argument("--payments", type=int, default=1_000_000)
    p.add_argument("--start", type=str, required=True, help="YYYY-MM-DD")
    p.add_argument("--end", type=str, required=True, help="YYYY-MM-DD")
    p.add_argument("--out", type=str, default="./out")
    p.add_argument("--ledger-id", type=str, default="302321")
    p.add_argument("--recipient-handle", type=str, default="NO:123455")
    p.add_argument("--currency", type=str, default="NOK")
    p.add_argument("--seed", type=int, default=42)
    p.add_argument("--net-settlement", action="store_true", help="Use net settlement (fees retained). If not set, gross settlement (monthly invoice) is simulated.")
    p.add_argument("--include-gdpr", type=float, default=0.10, help="Probability [0..1] that a transaction has GDPR fields populated (name/message/maskedPhoneNo).")
    p.add_argument("--tz", type=str, default="Z", help="Timestamp suffix; default Z (UTC-style).")
    return p.parse_args()

def write_day_file(out_dir, topic, date, items, page_cursor="sample-page", has_more=False, try_later=False):
    os.makedirs(os.path.join(out_dir, topic), exist_ok=True)
    out_path = os.path.join(out_dir, topic, f"{date.isoformat()}.json.gz")
    payload = {
        "cursor": b64_cursor(f"{topic}:{date.isoformat()}:{page_cursor}"),
        "hasMore": bool(has_more),
        "tryLater": bool(try_later),
        "items": items
    }
    with gzip.open(out_path, "wt", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, separators=(",", ":"))
    return out_path

def main():
    args = parse_args()
    random.seed(args.seed)

    start = dt.date.fromisoformat(args.start)
    end   = dt.date.fromisoformat(args.end)
    assert start <= end, "Start must be <= end"

    dates = list(dtrange(start, end))
    days  = len(dates)

    # Allocate payments per day by weekday weights
    weights = [weekday_weight(d) for d in dates]
    total_w = sum(weights)
    per_day = [max(0, int(round(args.payments * (w / total_w)))) for w in weights]
    # Adjust to exact total
    delta = args.payments - sum(per_day)
    for i in range(abs(delta)):
        idx = i % days
        per_day[idx] += 1 if delta > 0 else -1

    # State across days
    funds_running_balance = 0
    fees_running_balance  = 0

    # Deferred events scheduled for future dates (refunds, disputes, aborted payouts)
    deferred_funds = defaultdict(list)  # date -> list of items (dict)
    deferred_fees  = defaultdict(list)

    # For gross settlement we accumulate monthly fees and add a monthly 'fees-invoiced' and 'credit-note' occasionally.
    monthly_fees_sum = defaultdict(int)  # (year, month) -> total capture-fee sum

    # Occasionally plan top-ups / corrections (funds) and corrections (fees)
    special_funds_queue = defaultdict(list)
    special_fees_queue  = defaultdict(list)

    # Helper to add item with running balance computed later
    def make_item(psp, time, date, entry_type, reference, currency, amount, recipient, message=None, name=None, phone=None):
        item = {
            "pspReference": psp,
            "time": time.strftime(ISO),
            "ledgerDate": date.isoformat(),
            "entryType": entry_type,
            "reference": reference,
            "currency": currency,
            "amount": amount,
            # balanceBefore/After added later
            "recipientHandle": recipient
        }
        if message is not None:
            item["message"] = message
        if name is not None:
            item["name"] = name
        if phone is not None:
            item["maskedPhoneNo"] = phone
        return item

    # Scenario probabilities (sum <= 1; remainder is plain capture)
    prob_full_refund   = 0.08
    prob_partial_refund= 0.07
    prob_dispute       = 0.002
    prob_correction    = 0.0005

    # Occasionally schedule a top-up / credit-note (funds), regardless of payments
    for d in dates:
        if random.random() < 0.0006:
            # top-up: positive
            amt = random.randint(50_00, 5_000_00)  # 50–5000 NOK
            t = rand_time_on(d, 7, 20)
            special_funds_queue[d].append(("top-up", amt, t))
        if random.random() < 0.0003:
            # credit-note (funds): negative
            amt = -random.randint(50_00, 1_000_00)
            t = rand_time_on(d, 7, 20)
            special_funds_queue[d].append(("credit-note", amt, t))
        if random.random() < prob_correction:
            # correction (funds), small adjustment positive or negative
            amt = random.choice([-1, 1]) * random.randint(50, 5_000)
            t = rand_time_on(d, 8, 22)
            special_funds_queue[d].append(("correction", amt, t))
        if random.random() < prob_correction:
            # correction (fees)
            amt = random.choice([-1, 1]) * random.randint(50, 5_000)
            t = rand_time_on(d, 8, 22)
            special_fees_queue[d].append(("correction", amt, t))

    # To simulate occasional payout-aborted:
    planned_payout_abort = {}  # date -> amount to return

    # Process each day
    for day_idx, date in enumerate(dates):
        funds_items = []
        fees_items  = []

        # Bring in deferred items from earlier days
        if deferred_funds.get(date):
            funds_items.extend(deferred_funds.pop(date))
        if deferred_fees.get(date):
            fees_items.extend(deferred_fees.pop(date))

        # Add queued specials
        for (etype, amt, t) in special_funds_queue.get(date, []):
            psp = gen_psp_ref("SPC")
            funds_items.append(make_item(psp, t, date, etype, "", args.currency, amt, args.recipient_handle))
        for (etype, amt, t) in special_fees_queue.get(date, []):
            psp = gen_psp_ref("SPC")
            fees_items.append(make_item(psp, t, date, etype, "", args.currency, amt, args.recipient_handle))

        # Payments captured today
        captures_today = per_day[day_idx]
        for _ in range(captures_today):
            ref = gen_reference()
            cap_amount = amount_ore()
            cap_time   = rand_time_on(date, 6, 23)
            psp_cap    = gen_psp_ref()

            # GDPR fields?
            msg = name = phone = None
            if random.random() < args.include_gdpr:
                msg = f"Payment for {ref}"
                name = rand_name()
                phone = masked_phone()

            # funds: capture
            funds_items.append(
                make_item(psp_cap, cap_time, date, "capture", ref, args.currency, cap_amount, args.recipient_handle, msg, name, phone)
            )

            # fees: capture-fee for same psp/reference
            fee_amount = capture_fee_ore(cap_amount)
            fee_time   = cap_time + dt.timedelta(seconds=random.randint(1, 120))
            fees_items.append(
                make_item(psp_cap, fee_time, date, "capture-fee", ref, args.currency, -fee_amount, args.recipient_handle)
            )
            # Track monthly fee totals for gross settlement
            monthly_key = (date.year, date.month)
            if not args.net_settlement:
                monthly_fees_sum[monthly_key] += fee_amount

            # Refund scenarios
            r = random.random()
            if r < prob_full_refund:
                # full refund 0-30 days after capture (sometimes same day)
                days_after = random.choices([0,1,2,3,4,5,6,7,14,21,30],[40,20,10,8,6,5,4,3,2,1,1])[0]
                refund_date = date + dt.timedelta(days=days_after)
                if refund_date <= end:
                    refund_time = (cap_time + dt.timedelta(hours=random.randint(1, 72)))
                    refund_time = refund_time.replace(tzinfo=dt.timezone.utc)
                    psp_ref = gen_psp_ref("RFD")
                    item = make_item(psp_ref, refund_time, refund_date, "refund", ref, args.currency, -cap_amount, args.recipient_handle)
                    deferred_funds[refund_date].append(item)
            elif r < prob_full_refund + prob_partial_refund:
                # 1-2 partial refunds totalling 10-90% of amount
                n_parts = random.choice([1,2])
                total_pct = random.randint(10, 90) / 100.0
                remaining = int(cap_amount * total_pct)
                for i in range(n_parts):
                    part = remaining if i == n_parts - 1 else random.randint(1, remaining - (n_parts - i - 1))
                    remaining -= part
                    days_after = random.choices([0,1,2,3,7,14],[50,20,10,8,7,5])[0]
                    refund_date = date + dt.timedelta(days=days_after)
                    if refund_date <= end:
                        refund_time = (cap_time + dt.timedelta(hours=random.randint(1, 48)))
                        refund_time = refund_time.replace(tzinfo=dt.timezone.utc)
                        psp_ref = gen_psp_ref("RFD")
                        item = make_item(psp_ref, refund_time, refund_date, "refund", ref, args.currency, -part, args.recipient_handle)
                        deferred_funds[refund_date].append(item)

            # Dispute scenario
            if random.random() < prob_dispute:
                # retain disputed funds some days after capture (negative), maybe returned later (positive)
                retain_days = random.randint(5, 25)
                retain_date = date + dt.timedelta(days=retain_days)
                if retain_date <= end:
                    amt = -min(cap_amount, random.randint(int(cap_amount*0.2), int(cap_amount*1.0)))
                    retain_time = cap_time + dt.timedelta(days=retain_days, hours=random.randint(0, 12))
                    deferred_funds[retain_date].append(
                        make_item(gen_psp_ref("DSP"), retain_time, retain_date, "retained-disputed-capture", ref, args.currency, amt, args.recipient_handle)
                    )
                # sometimes returned
                if random.random() < 0.6:
                    return_days = retain_days + random.randint(7, 35)
                    return_date = date + dt.timedelta(days=return_days)
                    if return_date <= end:
                        return_time = cap_time + dt.timedelta(days=return_days, hours=random.randint(0, 12))
                        deferred_funds[return_date].append(
                            make_item(gen_psp_ref("DSP"), return_time, return_date, "returned-disputed-capture", ref, args.currency, -amt, args.recipient_handle)
                        )

        # Settlement-related entries for the day
        # Net settlement: fees retained daily
        if args.net_settlement:
            # Sum fees on fees ledger today (negative entries)
            fees_sum_today = -sum(i["amount"] for i in fees_items if i["entryType"] == "capture-fee")
            if fees_sum_today > 0:
                t = dt.datetime(date.year, date.month, date.day, 23, 50, 0, 0, tzinfo=dt.timezone.utc)
                psp_fee = gen_psp_ref("FEE")
                # funds: negative
                funds_items.append(
                    make_item(psp_fee, t, date, "fees-retained", "", args.currency, -fees_sum_today, args.recipient_handle)
                )
                # fees: positive
                fees_items.append(
                    make_item(psp_fee, t, date, "fees-retained", "", args.currency, fees_sum_today, args.recipient_handle)
                )
        else:
            # Gross settlement: invoice on the last day of the month for accumulated fees
            last_of_month = (date + dt.timedelta(days=31)).replace(day=1) - dt.timedelta(days=1)
            if date == last_of_month:
                key = (date.year, date.month)
                total_fees = monthly_fees_sum.get(key, 0)
                if total_fees > 0:
                    t = dt.datetime(date.year, date.month, date.day, 10, 0, 0, 0, tzinfo=dt.timezone.utc)
                    psp = gen_psp_ref("INV")
                    fees_items.append(
                        make_item(psp, t, date, "fees-invoiced", f"INV-{date.year}{date.month:02d}", args.currency, total_fees, args.recipient_handle)
                    )
                    # Occasionally a small credit-note on fees
                    if random.random() < 0.05:
                        credit = random.randint(100, min(5_000, total_fees))
                        t2 = t + dt.timedelta(hours=1)
                        fees_items.append(
                            make_item(gen_psp_ref("CRN"), t2, date, "credit-note", f"CN-{date.year}{date.month:02d}", args.currency, -credit, args.recipient_handle)
                        )

        # Payout scheduling (funds)
        day_delta = sum(i["amount"] for i in funds_items)
        est_balance_after = funds_running_balance + day_delta
        if est_balance_after > 0:
            payout_amt = -est_balance_after  # bring to ~0
            t = dt.datetime(date.year, date.month, date.day, 22, 0, 0, 0, tzinfo=dt.timezone.utc)
            psp_po = gen_psp_ref("PO")
            funds_items.append(
                make_item(psp_po, t, date, "payout-scheduled", f"PAYOUT-{date.isoformat()}", args.currency, payout_amt, None)
            )
            # Rarely: payout aborted next day (balance returns)
            if random.random() < 0.0008:
                planned_payout_abort[date + dt.timedelta(days=1)] = -payout_amt

        # Apply payout-aborted if planned for this date
        if planned_payout_abort.get(date):
            amt_back = planned_payout_abort.pop(date)
            t = dt.datetime(date.year, date.month, date.day, 8, 30, 0, 0, tzinfo=dt.timezone.utc)
            funds_items.append(
                make_item(gen_psp_ref("POA"), t, date, "payout-aborted", f"PAYOUT-ABORT-{date.isoformat()}", args.currency, amt_back, None)
            )

        # Sort items by time for deterministic balances
        funds_items.sort(key=lambda x: x["time"])
        fees_items.sort(key=lambda x: x["time"])

        # Compute running balances and set balanceBefore/After
        for it in funds_items:
            it["balanceBefore"] = funds_running_balance
            funds_running_balance += it["amount"]
            it["balanceAfter"]  = funds_running_balance

        for it in fees_items:
            it["balanceBefore"] = fees_running_balance
            fees_running_balance += it["amount"]
            it["balanceAfter"]  = fees_running_balance

        # Write per-day payloads
        write_day_file(args.out, "funds", date, funds_items)
        write_day_file(args.out, "fees",  date, fees_items)

    # Done
    print(f"Wrote {len(dates)} funds files and {len(dates)} fees files to: {args.out}")

if __name__ == "__main__":
    main()