o
    Gh                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ ej	ej
dd eeZdededB fdd	Zed
krdee jdkr^e jd ZeeZer\ed eejeddd ed dS dS ed dS dS )    N)sync_playwrightTimeoutError)
HTMLParserz)%(asctime)s - %(levelname)s - %(message)s)levelformaturlreturnc              
   C   s  t d|   zt }|jjddgd}| }t d |j| ddd t d	 z	|jd
dd W n tyE   t 	d Y nw t d z	|jddd W n tyb   t 	d Y nw |
d |d}t|}i }|d}|s|d}|r|jddnd|d< t d|d   g d}d}	|D ]}
||
}	|	r nq|	r|	jddnd|d< t d|d   g d}d}|D ]}
||
}|r nq|r|jdp|jd }||d!< n|d"}|r|jd#|d!< nd$|d!< t d%|d!   | |d&< |  t d' |W  d   W S 1 sw   Y  W dS  ty:   t d(|  d) Y dS  tyU } zt d*|  W Y d}~dS d}~ww )+zH
    Fungsi utama untuk scrape data produk dari URL yang diberikan.
    z#Memulai proses scraping untuk URL: Tz--no-sandbox)headlessargszNavigasi ke halaman...domcontentloadedi`  )
wait_untiltimeoutz!Menunggu selector judul produk...z6h1[data-pl="product-title"], .title--line-one--nU9Qttoi0u  )r   zCSelector judul tidak ditemukan, melanjutkan dengan yang tersedia...z!Menunggu selector gambar utama...z]img[data-image-gallery-featured-image], [data-spm="image_magnifier"] img, img.magnifier-imagezJSelector gambar utama tidak ditemukan, melanjutkan dengan yang tersedia...i  bodyzh1[data-pl="product-title"]z.title--line-one--nU9Qtto)stripzJudul tidak ditemukantitlezJudul ditemukan: )z!.pdp-product-price .price.currentz.product-price-currentz.ae-product-pricez*[class*="Price"]z*[class*="price"]z.product-price .valuez.current-price spanz.sale-valueNzHarga tidak ditemukanpricezHarga ditemukan: )z&img[data-image-gallery-featured-image]z [data-spm="image_magnifier"] imgzimg.magnifier-imagez(*[class*="gallery-preview-panel__image"]zimg#img-blankz!.image-thumb-item:first-child imgsrczdata-srcmain_image_urlzmeta[property="og:image"]contentzGambar utama tidak ditemukanzURL Gambar Utama ditemukan: 
source_urlzProses scraping berhasil.zTimeout saat scraping z?. Halaman mungkin terlalu lambat atau selector tidak ditemukan.zTerjadi kesalahan tak terduga: )loggerinfor   chromiumlaunchnew_pagegotowait_for_selectorPlaywrightTimeoutErrorwarningwait_for_timeout
inner_htmlr   	css_firsttext
attributesgetcloseerror	Exception)r   pbrowserpagehtmltreedata
title_nodeprice_selectors
price_nodeselectorimage_selectorsmain_image_nodeimg_srcog_image_nodee r7   &/var/www/html/alibaba/final_scraper.pyscrape_product_data   s   








	



*_r9   __main__   z--- HASIL SCRAPING ---   F)indentensure_asciiz----------------------zQError: Tolong berikan URL sebagai argumen. Contoh: python3 final_scraper.py <URL>)sysjsonloggingplaywright.sync_apir   r   r   selectolax.parserr   basicConfigINFO	getLogger__name__r   strdictr9   lenargvurl_to_scrapescraped_dataprintdumpsr7   r7   r7   r8   <module>   s&    
l
