Add the filter argument.
This commit is contained in:
parent
2ab1331c92
commit
9864ad2617
5
main.py
5
main.py
|
@ -6,12 +6,15 @@ from bs4 import BeautifulSoup
|
|||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description="Dig out links from a website.")
|
||||
parser.add_argument('site', type=str, help="Website that you want to scrape for links.")
|
||||
parser.add_argument('-f', '--filter', type=str, default='\..*$', help="Only return filenames matching this regular expression.")
|
||||
args = parser.parse_args()
|
||||
|
||||
h = httplib2.Http('.cache')
|
||||
response, content = h.request(args.site)
|
||||
s = BeautifulSoup(content)
|
||||
"""find only file names"""
|
||||
links = s.find_all(href=re.compile('\..*$'))
|
||||
links = s.find_all(href=re.compile(args.filter))
|
||||
if not links or links == 0:
|
||||
exit("No filenames found with the given filter.")
|
||||
for link in links:
|
||||
print(args.site + link['href'])
|
||||
|
|
Loading…
Reference in New Issue