Add support for specifying website.

This commit is contained in:
Lukáš Kucharczyk 2020-06-28 21:20:52 +02:00
parent 08e4cb4630
commit 2ab1331c92
1 changed files with 7 additions and 3 deletions

10
main.py
View File

@ -1,13 +1,17 @@
import httplib2 import httplib2
import re import re
import argparse
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Dig out links from a website.")
parser.add_argument('site', type=str, help="Website that you want to scrape for links.")
args = parser.parse_args()
h = httplib2.Http('.cache') h = httplib2.Http('.cache')
site = 'https://realpornclip.com/wp-content/uploads/2020/06/' response, content = h.request(args.site)
response, content = h.request(site)
s = BeautifulSoup(content) s = BeautifulSoup(content)
"""find only file names""" """find only file names"""
links = s.find_all(href=re.compile('\..*$')) links = s.find_all(href=re.compile('\..*$'))
for link in links: for link in links:
print(site + link['href']) print(args.site + link['href'])