Add support for specifying website.
This commit is contained in:
parent
08e4cb4630
commit
2ab1331c92
10
main.py
10
main.py
|
@ -1,13 +1,17 @@
|
||||||
import httplib2
|
import httplib2
|
||||||
import re
|
import re
|
||||||
|
import argparse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description="Dig out links from a website.")
|
||||||
|
parser.add_argument('site', type=str, help="Website that you want to scrape for links.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
h = httplib2.Http('.cache')
|
h = httplib2.Http('.cache')
|
||||||
site = 'https://realpornclip.com/wp-content/uploads/2020/06/'
|
response, content = h.request(args.site)
|
||||||
response, content = h.request(site)
|
|
||||||
s = BeautifulSoup(content)
|
s = BeautifulSoup(content)
|
||||||
"""find only file names"""
|
"""find only file names"""
|
||||||
links = s.find_all(href=re.compile('\..*$'))
|
links = s.find_all(href=re.compile('\..*$'))
|
||||||
for link in links:
|
for link in links:
|
||||||
print(site + link['href'])
|
print(args.site + link['href'])
|
||||||
|
|
Loading…
Reference in New Issue