import csv
import datetime
import json
import logging
import shutil
import time
from copy import deepcopy
from urllib.parse import urlparse, parse_qs

from playwright.sync_api import sync_playwright, Request, Page, Response

from configs.idp_rules import idp_rules
from modules.browser.browser import PlaywrightBrowser, PlaywrightHelper
from modules.helper.tmp import TmpHelper
from modules.helper.url import URLHelper

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

idp_rules_with_generic = deepcopy(idp_rules)

idp_rules_with_generic["FACEBOOK"]["passive_login_request_rule"] = {
    "domain": "facebook\\.com$",
    "path": "/x/oauth/",
    "params": [
        {
            "name": "^(client_id|app_id)$",
            "value": ".*"
        }
    ]
}

idp_rules_with_generic["GENERIC"] = {
    "login_request_rule": {
        "domain": ".*",
        "path": ".*",
        "params": [
            {"name": "^(client_id|clientid|app_id|appid)$",
             "value": ".*"},
            {"name": "^(redirect_uri|redirecturi)$", "value": ".*"}
        ]},
    "passive_login_request_rule": {}
}

user_actions = ["PageDown", "PageUp", "ArrowRight", "ArrowLeft", "ArrowDown", "ArrowUp"]


class PrivacyTrace:
    def __init__(self, config: dict):
        self.store_screenshot = config['artifact_config']['store_screenshot']
        self.store_har = config['artifact_config']['store_har']
        self.profile_directory = config["profile_directory"]
        self.task = config["task"]
        self.found_lreq = None
        self.full_leak = None
        self.full_leak_type = None
        self.user_action_performed = False
        self.browser_config = config['browser_config']
        self.screenshot = None
        self.har = None
        self.errors = []

    @staticmethod
    def schedule(scan_config: dict, analysis_config: dict):
        with open(scan_config['privacy_scan_csv']) as f:
            reader = csv.reader(f)
            for row in reader:
                if (scan_config['scan_for_google'] and row[1] == "GOOGLE") or (
                        scan_config['scan_for_facebook'] and row[1] == "FACEBOOK") or (
                        scan_config['scan_for_microsoft'] and row[1] == "MICROSOFT") or (
                        scan_config['scan_for_newscorpaustralia'] and row[1] == "GENERIC" and
                        urlparse(row[2]).netloc == "login.newscorpaustralia.com"):
                    c = deepcopy(analysis_config)
                    c['profile_directory'] = scan_config['profile_directory']
                    c["task"] = row
                    yield c

    def interceptor(self, req: Request):
        if self.found_lreq is None:
            # Currently we did not verify the login request. Therefore, we can not get a full leak
            for idp in idp_rules_with_generic:
                if idp != self.task[1] or self.found_lreq is not None:
                    continue
                lreq_rule = idp_rules_with_generic[idp]["login_request_rule"]
                plreq_rule = idp_rules_with_generic[idp]["passive_login_request_rule"]
                if lreq_rule and URLHelper.match_url(req.url, lreq_rule["domain"], lreq_rule["path"],
                                                     lreq_rule["params"]):
                    logger.info("Found a login request for %s", self.task[0])
                    # self.found_lreq = {"idp": idp, "lreq": req.url, "user_action_performed": self.user_action_performed}
                    self.found_lreq = {"idp": idp, "lreq": req.url, "user_action_performed": self.user_action_performed}
                elif plreq_rule and URLHelper.match_url(req.url, plreq_rule["domain"], plreq_rule["path"],
                                                        plreq_rule["params"]):
                    logger.info("Found a **passive** login request for %s", self.task[0])
                    # self.found_lreq = {"idp": idp, "lreq": req.url, "user_action_performed": self.user_action_performed}
                    self.found_lreq = {"idp": idp, "lreq": req.url, "user_action_performed": self.user_action_performed}
        else:
            # We verified the login request. Looking for a full leak
            if self.full_leak:
                return
            params = urlparse(req.url)
            query = parse_qs(params.query)
            if "code" in query or "access_token" in query or "id_token" in query or "accesstoken" in query or "idtoken" in query:
                try:
                    parsed = urlparse(self.found_lreq['lreq'])
                    query = parse_qs(parsed.query)
                    if "redirect_uri" in query and urlparse(query['redirect_uri'][0]).netloc in req.url:
                        logger.info("Full leak detected (auth response) %s", req.url)
                        self.full_leak = req.url
                        self.full_leak_type = "AUTH RESPONSE"
                except:
                    pass  # Not the request we are looking for

    def interceptor_response(self, res: Response):
        if self.found_lreq is None or self.full_leak is not None:
            return
        if self.task[1] == "FACEBOOK" and "/x/oauth/status" in res.url:
            if "fb-ar" in res.headers:
                logger.info("Full leak detected (fb-ar header): %s", res.headers["fb-ar"])
                self.full_leak = res.headers["fb-ar"]
                self.full_leak_type = "FB-AR HEADER"
            elif "fb-s" in res.headers:
                if res.headers["fb-s"] != "unknown":
                    logger.info("Full leak lite detected (fb-s header): %s", res.headers["fb-s"])
                    self.full_leak = res.headers["fb-s"]
                    self.full_leak_type = "FB-S HEADER"

    def check_postmessage_leak(self, req: Request):
        purl = urlparse(req.url)
        pquery = parse_qs(purl.query)
        if purl.netloc == "mock.sso-monitor.me" and req.method == "POST" and req.post_data_json:
            try:
                if urlparse(req.post_data_json['origin']).netloc == "accounts.google.com":
                    try:
                        data = json.loads(req.post_data_json['data'])
                    except:
                        data = req.post_data_json['data']
                    if "params" in data and "type" in data['params'] and data['params'][
                        "type"] == "authResult" and "authResult" in data["params"]:
                        logger.info("Full leak detected (Google post message auth result): %s" % req.post_data_json)
                        self.full_leak = req.post_data_json
                        self.full_leak_type = "GOOGLE POST MESSAGE AUTH RESULT"
                    elif "response" in data and "credential" in data["response"]:
                        logger.info("Full leak detected (Google post message response): %s" % req.post_data_json)
                        self.full_leak = req.post_data_json
                        self.full_leak_type = "GOOGLE POST MESSAGE RESPONSE"
                elif urlparse(req.post_data_json['origin']).netloc == "login.newscorpaustralia.com":
                    data = req.post_data_json['data']
                    if data['type'] == "authorization_response" and "response" in data:
                        if "access_token" in data["response"] or "id_token" in data["response"] or "code" in data[
                            "response"]:
                            logger.info("Full leak detected (post message auth result): %s" % req.post_data_json)
                            self.full_leak = req.post_data_json
                            self.full_leak_type = "POST MESSAGE RESPONSE"
            except:  # Not the request we are looking for
                pass

    def check_location_fragment_leak(self, res: Response):
        if 300 <= res.status < 400:
            if "location" in res.headers:
                try:
                    loc_header = urlparse(res.headers["location"])
                    frag = parse_qs(loc_header.fragment)
                    if "code" in frag or "access_token" in frag or "id_token" in frag or "accesstoken" in frag or "idtoken" in frag:
                        logger.info("Full leak detected (fragment redirect) %s" % res.headers["location"])
                        self.full_leak = res.headers["location"]
                        self.full_leak_type = "FRAGMENT REDIRECT"
                except Exception as e:
                    logger.error(e)
                    self.errors.append({"type": "LOC_HEADER_PARSING", "error": str(e)})

    def check_post_leak(self, req: Request):
        if req.method != "POST" or self.full_leak is not None:
            return
        if urlparse(req.url).netloc == "login.live.com" and (
                "origin" in req.headers and urlparse(req.headers["origin"]).netloc == "login.live.com") or (
                "referer" in req.headers and urlparse(req.headers["referer"]).netloc == "login.live.com"):
            print("Skipping response from login.live.com to itself")
            return
        try:
            parsed = parse_qs(req.post_data)
            if "code" in parsed or "access_token" in parsed or "id_token" in parsed or "accesstoken" in parsed or "idtoken" in parsed:
                logger.info("Full leak detected (form post) %s" % parsed)
                self.full_leak = req.post_data
                self.full_leak_type = "FORM POST"
        except:
            pass  # Not the request we are looking for

    def perform_simulated_user_action(self, page: Page):
        logger.info("Performing simulated user actions")
        self.user_action_performed = True
        page.mouse.move(23, 55)
        time.sleep(0.1)
        page.mouse.move(35, 121)
        time.sleep(0.1)
        for i in range(20):
            page.mouse.wheel(0, 10)
            time.sleep(0.01)
        for i in range(20):
            page.mouse.wheel(0, -10)
            time.sleep(0.01)
        time.sleep(0.25)
        for ua in user_actions:
            page.keyboard.press(ua)
            time.sleep(0.25)
        time.sleep(0.5)

    def check_google_logged_in(self, page):
        logger.info("Checking login state of google")
        page.goto("https://accounts.google.com/")
        time.sleep(1)
        logged_in = page.url.startswith("https://myaccount.google.com/")
        logger.info(f"Logged in state for google: %s", logged_in)
        return logged_in

    def check_facebook_logged_in(self, page):
        logger.info("Checking login state of facebook")
        page.goto("https://www.facebook.com/settings")
        time.sleep(1)
        logged_in = page.url.startswith("https://www.facebook.com/settings")
        logger.info(f"Logged in state for facebook: %s", logged_in)
        return logged_in

    def check_microsoft_logged_in(self, page):
        logger.info("Checking login state of microsoft")
        page.goto("https://account.microsoft.com/profile")
        time.sleep(1)
        logged_in = False
        i = 0
        while i < 3 and not logged_in:
            page.wait_for_timeout(3000)
            logged_in = page.url.startswith("https://account.microsoft.com/profile") or page.url.startswith(
                "https://account.microsoft.com/account-checkup")
            logger.info("Check")
            i += 1
        logger.info(f"Logged in state for microsoft: %s", logged_in)
        if not logged_in:
            logger.info(page.url)
        return logged_in

    def check_newscorpaustralia_logged_in(self, page):
        return True  # No good way at the moment

    def start(self):
        logger.info(f"Starting privacy trace for %s searching for %s" % (self.task[0], self.task[1]))
        start_time = datetime.datetime.now()
        while True:
            with TmpHelper.tmp_dir() as pdir, TmpHelper.tmp_file() as har_file, sync_playwright() as pw:
                shutil.copytree(self.profile_directory, pdir, dirs_exist_ok=True)
                context, page = PlaywrightBrowser.instance(pw, self.browser_config, pdir,
                                                           har_file if self.store_har else None)
                if (self.task[1] == "GOOGLE" and not self.check_google_logged_in(page)) or (
                        self.task[1] == "FACEBOOK" and not self.check_facebook_logged_in(page)) or (
                        self.task[1] == "MICROSOFT" and not self.check_microsoft_logged_in(page)):
                    logger.warning(
                        f"Please be sure to copy a logged in profile to %s. We will retry in 30s" % self.profile_directory)
                    PlaywrightHelper.close_context(context)
                    time.sleep(30)
                    continue
                logger.info("Register interceptor")
                context.on("request", self.interceptor)
                context.on("request", self.check_postmessage_leak)
                context.on("response", self.interceptor_response)
                context.on("response", self.check_location_fragment_leak)
                context.on("request", self.check_post_leak)
                try:
                    PlaywrightHelper.navigate(page, self.task[0])
                    self.perform_simulated_user_action(page)
                except Exception as e:
                    logger.error("Navigation and/or user interaction failed: " + str(e))
                    self.errors.append({"type": "NAVIGATION_AND_SIMULATED_USER_INPUT", "error": str(e)})

                if not self.full_leak:
                    logger.info("Waiting 10 seconds to enable full leak to be detected")
                time.sleep(10)
                if self.store_screenshot:
                    try:
                        self.screenshot = PlaywrightHelper.take_screenshot(page)
                    except Exception as e:
                        logger.error("Could not take screenshot! " + str(e))
                        self.screenshot = str(e)
                        self.errors.append({"type": "SCREENSHOT_TAKING", "error": str(e)})
                try:
                    PlaywrightHelper.close_context(context)
                except Exception as e:
                    logger.error("Could not close context! Trying to manually close context")
                    page.close()
                    context.close()
                    self.errors.append({"type": "CLOSING_CONTEXT", "error": str(e)})
                if self.store_har:
                    try:
                        self.har = PlaywrightHelper.take_har(har_file)
                    except Exception as e:
                        logger.error("Could not take har! " + str(e))
                        self.har = str(e)
                        self.errors.append({"type": "TAKING_HAR", "error": str(e)})
                break  # Not a good context but I did not get anything better

        duration = datetime.datetime.now() - start_time
        logger.info("Finished task in %s seconds" % duration.seconds)
        return {
            "found_lreq": self.found_lreq,
            "full_leak": self.full_leak,
            "full_leak_type": self.full_leak_type,
            "duration": duration.seconds,
            "screenshot": self.screenshot,
            "har": self.har,
            "errors_while_running": self.errors
        }


if __name__ == "__main__":
    from pathlib import Path

    profile_dir = "/tmp/browser_profile"
    bc = {
        "name": "CHROMIUM",
        "width": 1920,
        "height": 1080,
        "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.29 Safari/537.36",
        "extensions": [
        ],
        "scripts": [],
        "headless": False,
        "locale": "de-DE",
        "wait_for_networkidle": True,
        "timeout_default": 10,
        "timeout_navigation": 10,
        "timeout_networkidle": 2,
        "sleep_after_onload": 2,
        "sleep_after_networkidle": 2
    }

    print(f"Generating profile to %s" % profile_dir)
    Path(profile_dir).mkdir(exist_ok=True)

    with sync_playwright() as pw:
        context, page = PlaywrightBrowser.instance(pw, bc, profile_dir, None)
        input("Press 'Enter' if everything is done")
        PlaywrightHelper.close_context(context)
    print("Done")
