import sys
import json
import logging
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
from selectolax.parser import HTMLParser

# --- Konfigurasi Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def get_nested_value(data_dict, keys, default=None):
    """Safely get a value from a nested dictionary."""
    for key in keys:
        if isinstance(data_dict, dict):
            data_dict = data_dict.get(key)
        else:
            return default
    return data_dict if data_dict is not None else default

def scrape_product_data(url: str) -> dict | None:
    """
    Fungsi utama untuk scrape data produk dari URL yang diberikan dengan mengekstrak data JSON.
    """
    logger.info(f"Memulai proses scraping untuk URL: {url}")
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            page = browser.new_page()
            
            logger.info("Navigasi ke halaman utama produk...")
            page.goto(url, wait_until="domcontentloaded", timeout=60000)

            logger.info("Menunggu data produk JSON dari __INIT_DATA_CALLBACK__...")
            page.wait_for_function("!!window.__INIT_DATA_CALLBACK__", timeout=30000)

            logger.info("Mengekstrak data produk JSON...")
            js_script = """
            async () => {
                try {
                    const res = await new Promise(window.__INIT_DATA_CALLBACK__);
                    if (res && res.data && res.data.result) {
                        return res.data.result;
                    }
                    return null;
                } catch (e) {
                    return { error: e.message };
                }
            }
            """
            product_json_data = page.evaluate(js_script)

            if not product_json_data or 'error' in product_json_data:
                error_msg = product_json_data.get('error') if product_json_data else "Data JSON tidak ditemukan."
                raise ValueError(f"Gagal mendapatkan data JSON produk: {error_msg}")

            # --- FIX: Deteksi jika produk diblokir di lokasi saat ini ---
            is_banned = get_nested_value(product_json_data, ['GLOBAL_DATA', 'globalData', 'bigBossBan'], False)
            if is_banned:
                ban_tip = get_nested_value(product_json_data, ['GLOBAL_DATA', 'globalData', 'bigBossBanTip'], "Produk tidak tersedia.")
                logger.error(f"Scraping gagal: {ban_tip}")
                browser.close()
                return None

            logger.info("Berhasil mengekstrak data JSON. Memproses data...")
            data = {}

            data['title'] = get_nested_value(product_json_data, ['GLOBAL_DATA', 'globalData', 'subject'], "Judul tidak ditemukan")
            data['price'] = get_nested_value(product_json_data, ['PRICE', 'targetSkuPriceInfo', 'salePriceString'], "Harga tidak ditemukan")
            data['image_urls'] = get_nested_value(product_json_data, ['HEADER_IMAGE_PC', 'imagePathList'], [])
            data['source_url'] = url

            specs_list = get_nested_value(product_json_data, ['PRODUCT_PROP_PC', 'showedProps'], [])
            data['specifications'] = {spec.get('attrName'): spec.get('attrValue') for spec in specs_list}

            data['description_image_urls'] = []
            description_url = get_nested_value(product_json_data, ['DESC', 'pcDescUrl'])
            if description_url:
                try:
                    logger.info(f"Navigasi ke URL deskripsi: {description_url}")
                    page.goto(description_url, wait_until="networkidle", timeout=30000)
                    desc_html = page.content()
                    desc_tree = HTMLParser(desc_html)
                    img_urls = set()
                    logger.info("Mengekstrak URL gambar dari HTML deskripsi...")
                    for img_node in desc_tree.css('img'):
                        img_src = img_node.attributes.get('src')
                        if img_src and img_src.startswith(('http', '//')):
                            if img_src.startswith('//'):
                                img_src = 'https:' + img_src
                            img_urls.add(img_src)
                    data['description_image_urls'] = list(img_urls)
                except Exception as e:
                    logger.error(f"Gagal mengambil gambar dari deskripsi: {e}")
            else:
                logger.warning("URL deskripsi (pcDescUrl) tidak ditemukan dalam data JSON.")

            logger.info(f"Judul: {data['title']}")
            logger.info(f"Harga: {data['price']}")
            logger.info(f"URL Gambar Utama ({len(data['image_urls'])}): Ditemukan {len(data['image_urls'])} gambar")
            logger.info(f"Spesifikasi ({len(data['specifications'])}): Ditemukan {len(data['specifications'])} item spesifikasi")
            logger.info(f"URL Gambar Deskripsi ({len(data['description_image_urls'])}): Ditemukan {len(data['description_image_urls'])} gambar unik")

            browser.close()
            logger.info("Proses scraping data tambahan berhasil.")
            return data

    except PlaywrightTimeoutError:
        logger.error(f"Timeout saat scraping {url}. Halaman mungkin terlalu lambat, __INIT_DATA_CALLBACK__ tidak ditemukan, atau struktur halaman berubah.")
        return None
    except Exception as e:
        logger.error(f"Terjadi kesalahan tak terduga: {e}")
        return None

if __name__ == "__main__":
    if len(sys.argv) > 1:
        url_to_scrape = sys.argv[1]
        scraped_data = scrape_product_data(url_to_scrape)
        if scraped_data:
            print("--- HASIL SCRAPING ---")
            print(json.dumps(scraped_data, indent=4, ensure_ascii=False))
            print("----------------------")
    else:
        print("Error: Tolong berikan URL sebagai argumen. Contoh: python3 final_scraper.py <URL>")
