diff --git a/main.py b/main.py index 58c36c7..84715c6 100644 --- a/main.py +++ b/main.py @@ -1,13 +1,17 @@ import httplib2 import re +import argparse from bs4 import BeautifulSoup if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Dig out links from a website.") + parser.add_argument('site', type=str, help="Website that you want to scrape for links.") + args = parser.parse_args() + h = httplib2.Http('.cache') - site = 'https://realpornclip.com/wp-content/uploads/2020/06/' - response, content = h.request(site) + response, content = h.request(args.site) s = BeautifulSoup(content) """find only file names""" links = s.find_all(href=re.compile('\..*$')) for link in links: - print(site + link['href']) + print(args.site + link['href'])