From 2ab1331c922b1c94860421ac018a9c7d1bc428e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Kucharczyk?= Date: Sun, 28 Jun 2020 21:20:52 +0200 Subject: [PATCH] Add support for specifying website. --- main.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 58c36c7..84715c6 100644 --- a/main.py +++ b/main.py @@ -1,13 +1,17 @@ import httplib2 import re +import argparse from bs4 import BeautifulSoup if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Dig out links from a website.") + parser.add_argument('site', type=str, help="Website that you want to scrape for links.") + args = parser.parse_args() + h = httplib2.Http('.cache') - site = 'https://realpornclip.com/wp-content/uploads/2020/06/' - response, content = h.request(site) + response, content = h.request(args.site) s = BeautifulSoup(content) """find only file names""" links = s.find_all(href=re.compile('\..*$')) for link in links: - print(site + link['href']) + print(args.site + link['href'])