Python:
from playwright.sync_api import sync_playwright
import json
def extract_urls(base_url):
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(base_url)
urls = page.evaluate('''() => {
const links = Array.from(document.querySelectorAll('a[href]:not([href^="javascript:"])'));
return links.map(link => {
const href = link.getAttribute('href');
if (href.startsWith('http://') || href.startsWith('https://')) {
return href;
} else {
return new URL(href, window.location.href).href;
}
});
}''')
browser.close()
return list(set(urls))
if __name__ == "__main__":
input_url = input("Enter URL: ")
extracted_urls = extract_urls(input_url)
json_output = json.dumps(extracted_urls, separators=(',', ':'))
print(json_output)