
import time
import csv
import threading
import datetime
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
from seleniumbase import Driver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
whole = time.time()
class ChromePool:
def init(self, size=4):
self.size = size
self.pool = []
self.lock = threading.Lock()
self._create_pool()
def _create_pool(self):
print(f"[INIT] Creating {self.size} Chrome instances...")
for i in range(self.size):
PROXY_HOST = "us3.4g.iproyal.com"
PROXY_PORT = 7008
PROXY_USER = "6CX8Aoh"
PROXY_PASS = "O2Ar6CP4cKznal2"
pr_driver = f"{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}"
driver = Driver(
uc=True,
proxy=pr_driver,
block_images=True,
page_load_strategy="eager",
window_size="1200,800",
window_position="-10000,0" # off-screen, but visible
)
self.pool.append(driver)
def get_driver(self):
with self.lock:
while not self.pool:
time.sleep(1)
return self.pool.pop()
def release_driver(self, driver):
with self.lock:
self.pool.append(driver)
def close_all(self):
with self.lock:
for d in self.pool:
try:
d.quit()
except:
pass
self.pool.clear()
chrome_pool = ChromePool(size=4)
def check_pages(driver):
try:
body_text = driver.find_element(By.TAG_NAME, “body”).text
if "Sorry!" in body_text or "We found no car matching your search criteria" in body_text:
return 0
try:
next_anchor = driver.find_element(By.CSS_SELECTOR, 'a[aria-label="next page"]')
try:
next_anchor.find_element(By.CSS_SELECTOR, "span.disabled")
return 1
except:
return -1
except:
return 1
except Exception as e:
print(f"[check_pages ERROR] {e}")
return 1
def get_last(driver, retries=5, timeout=30):
for attempt in range(retries):
try:
WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.TAG_NAME, “body”))
)
body_text = driver.find_element(By.TAG_NAME, “body”).text
try:
last_page = body_text.split("used cars for sale found")[0].split()[-1].strip()
last_page = "".join(filter(str.isdigit, last_page))
last_page = int(last_page) // 7 + 1
except:
last_page = 1
return last_page
except Exception as e:
print(f"[get_last ERROR] Attempt {attempt+1} → {e}")
if attempt < retries - 1:
driver.refresh()
time.sleep(3)
else:
return 1
return 1
def scrape_page(driver):
cars = []
soup = BeautifulSoup(driver.get_page_source(), ‘html.parser’)
divs = soup.find_all(class_=’singleSearchCard m24t p12 bg-w border-gray border8′)
try:
for li in divs:
try:
link = ‘https://uae.yallamotor.com’ + li.find(‘h2’, class_=”font16 font-b m8t”).find(‘a’)[‘href’]
except:
continue
try:
title = li.find(class_=”font16 font-b m8t”).text
except:
title = “”
cars.append({“title”: title, “link”: link})
except Exception as e:
print(f”[scrape_page ERROR] {e}”)
return cars
def scrape_entry(url, lock, output_file=”ylla_tester.csv”, max_retries=5):
driver = chrome_pool.get_driver()
results = []
try:
for attempt in range(max_retries):
try:
driver.get(url)
time.sleep(2)
decision = check_pages(driver)
if decision == 0:
print("[SKIP] No cars found on", url)
return
elif decision == 1:
print("[INFO] Single page:", url)
results.extend(scrape_page(driver))
elif decision == -1:
last_page = get_last(driver)
print(f"[INFO] Multi-page ({last_page} pages):", url)
for page in range(1, last_page + 1):
page_url = f"{url}&page={page}" if "?" in url else f"{url}?page={page}"
driver.get(page_url)
time.sleep(2)
results.extend(scrape_page(driver))
break # ✅ success, stop retry loop
except Exception as e:
print(f"[RETRY] Attempt {attempt+1}/{max_retries} failed for {url} → {e}")
if attempt < max_retries - 1:
time.sleep(3)
continue
else:
print(f"[FAILED] Could not scrape {url} after {max_retries} attempts")
return
if results:
with lock:
with open(output_file, "a", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["title", "link"])
if f.tell() == 0:
writer.writeheader()
writer.writerows(results)
finally:
chrome_pool.release_driver(driver)
def generate_urls():
FILTERED_LINKs = []
cities = [
‘ct_al-ain’, ‘ct_ajman’, ‘ct_abu-dhabi’, ‘ct_dubai’,
‘ct_fujairah’, ‘ct_ras-al-khaimah’, ‘ct_sharjah’, ‘ct_umm-al-quwain’
]
# for ct in cities:
# FILTERED_LINKs.append(f’https://uae.yallamotor.com/used-cars/{ct}/yr_earlier_2006′)
frmaat = 'https://uae.yallamotor.com/used-cars/{}/{}'
for i in range(2022,2023, 1):
for ct in cities:
FILTERED_LINKs.append(frmaat.format(ct, i))
# for i in range(2007, datetime.date.today().year + 1, 1):
# for ct in cities:
# FILTERED_LINKs.append(frmaat.format(ct, i))
FILTERED_LINKs_last = []
for link in FILTERED_LINKs:
FILTERED_LINKs_last.append(link.strip() + '/sl_individual/cn_used?sort=price_desc')
# FILTERED_LINKs_last.append(link.strip() + '/sl_individual/cn_used?sort=price_asc')
FILTERED_LINKs_last.append(link.strip() + '/sl_dealer/cn_used?sort=price_desc')
# FILTERED_LINKs_last.append(link.strip() + '/sl_dealer/cn_used?sort=price_asc')
return FILTERED_LINKs_last
if name == “main“:
urls = generate_urls()
print(f”[INFO] Generated {len(urls)} URLs to scrape”)
lock = threading.Lock()
with ThreadPoolExecutor(max_workers=4) as executor:
futures = [executor.submit(scrape_entry, url, lock) for url in urls]
for future in as_completed(futures):
try:
print("Done:", future.result())
except Exception as e:
print("Error:", e)
chrome_pool.close_all()
print(‘{} mins’.format((time.time()-whole)//60))
Sell any car cash today