Add support for specifying website.

2020-06-28 21:20:52 +02:00
parent 08e4cb4630
commit 2ab1331c92
1 changed files with 7 additions and 3 deletions
@@ -1,13 +1,17 @@
 import httplib2
 import re
+import argparse
 from bs4 import BeautifulSoup

 if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Dig out links from a website.")
+    parser.add_argument('site', type=str, help="Website that you want to scrape for links.")
+    args = parser.parse_args()
+    
    h = httplib2.Http('.cache')
-    site = 'https://realpornclip.com/wp-content/uploads/2020/06/'
-    response, content = h.request(site)
+    response, content = h.request(args.site)
    s = BeautifulSoup(content)
    """find only file names"""
    links = s.find_all(href=re.compile('\..*$'))
    for link in links:
-        print(site + link['href'])
+        print(args.site + link['href'])