diff --git a/main.py b/main.py index 84715c6..a538683 100644 --- a/main.py +++ b/main.py @@ -6,12 +6,15 @@ from bs4 import BeautifulSoup if __name__ == '__main__': parser = argparse.ArgumentParser(description="Dig out links from a website.") parser.add_argument('site', type=str, help="Website that you want to scrape for links.") + parser.add_argument('-f', '--filter', type=str, default='\..*$', help="Only return filenames matching this regular expression.") args = parser.parse_args() h = httplib2.Http('.cache') response, content = h.request(args.site) s = BeautifulSoup(content) """find only file names""" - links = s.find_all(href=re.compile('\..*$')) + links = s.find_all(href=re.compile(args.filter)) + if not links or links == 0: + exit("No filenames found with the given filter.") for link in links: print(args.site + link['href'])