#!/usr/bin/env python3
"""
Product URL finder for Tokopedia stores
"""

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time

def find_product_urls():
    """Find product URLs from a Tokopedia store page"""
    
    url = input("Masukkan URL toko Tokopedia: ").strip()
    
    if not url:
        print("URL tidak boleh kosong!")
        return
    
    # Setup Chrome options
    options = Options()
    options.add_argument("--start-maximized")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    # Add user agent to appear more like a real browser
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    # Run in headless mode to avoid UI issues
    options.add_argument("--headless")
    
    try:
        print("Membuka browser...")
        driver = webdriver.Chrome(options=options)
        driver.get(url)
        
        # Wait for page to load
        print("Menunggu halaman dimuat...")
        time.sleep(5)
        
        # Get page source and parse with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, "html.parser")
        
        print(f"Page title: {soup.title.text if soup.title else 'No title'}")
        
        # Try to find product links
        print("\nMencari tautan produk...")
        
        # Common selectors for product links in Tokopedia
        product_selectors = [
            'a[data-testid="lnkProductContainer"]',
            'a[title]',
            'a[href*="/p/"]',
            '[data-testid="divProductWrapper"] a',
            '.css-1as99lw a',  # Product link class
            '.css-1rn6k16 a'   # Another product link class
        ]
        
        product_urls = []
        
        for selector in product_selectors:
            elements = soup.select(selector)
            print(f"Found {len(elements)} elements with selector '{selector}'")
            
            for element in elements:
                href = element.get('href')
                title = element.get('title') or element.get_text(strip=True)
                
                if href and '/p/' in href:
                    # Make sure it's a full URL
                    if href.startswith('/'):
                        href = 'https://www.tokopedia.com' + href
                    elif not href.startswith('http'):
                        href = 'https://www.tokopedia.com' + href
                        
                    if href not in product_urls:
                        product_urls.append(href)
                        print(f"  Product: {title[:50]}... -> {href}")
                        
                        # Limit to first 5 products for demo
                        if len(product_urls) >= 5:
                            break
            
            if len(product_urls) >= 5:
                break
        
        if product_urls:
            print(f"\nDitemukan {len(product_urls)} URL produk:")
            for i, url in enumerate(product_urls, 1):
                print(f"{i}. {url}")
                
            # Try to analyze the first product page
            print(f"\nMenganalisis halaman produk pertama...")
            first_product_url = product_urls[0]
            driver.get(first_product_url)
            time.sleep(5)
            
            product_soup = BeautifulSoup(driver.page_source, "html.parser")
            print(f"Product page title: {product_soup.title.text if product_soup.title else 'No title'}")
            
            # Look for review sections
            review_elements = product_soup.find_all(class_=lambda x: x and ('review' in x.lower() or 'ulasan' in x.lower()))
            print(f"Found {len(review_elements)} review-related elements on product page")
            
            # Look for data-testid with review/ulasan
            data_testid_elements = product_soup.find_all(attrs={'data-testid': lambda x: x and ('review' in x.lower() or 'ulasan' in x.lower())})
            print(f"Found {len(data_testid_elements)} data-testid elements with 'review' or 'ulasan'")
            
            # Show sample elements
            for i, elem in enumerate(data_testid_elements[:3]):
                print(f"  Element {i+1}: {elem.name} - {elem.get('data-testid')} - Text: {elem.get_text(strip=True)[:50]}...")
        else:
            print("Tidak ditemukan URL produk di halaman toko.")
            
    except Exception as e:
        print(f"Terjadi kesalahan: {e}")
        import traceback
        traceback.print_exc()
    finally:
        try:
            driver.quit()
            print("Browser ditutup.")
        except:
            pass

if __name__ == "__main__":
    find_product_urls()