shovel/main.py

18 lines
562 B
Python

import httplib2
import re
import argparse
from bs4 import BeautifulSoup
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Dig out links from a website.")
parser.add_argument('site', type=str, help="Website that you want to scrape for links.")
args = parser.parse_args()
h = httplib2.Http('.cache')
response, content = h.request(args.site)
s = BeautifulSoup(content)
"""find only file names"""
links = s.find_all(href=re.compile('\..*$'))
for link in links:
print(args.site + link['href'])