Add the filter argument.
This commit is contained in:
		
							
								
								
									
										5
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								main.py
									
									
									
									
									
								
							| @ -6,12 +6,15 @@ from bs4 import BeautifulSoup | ||||
| if __name__ == '__main__': | ||||
|     parser = argparse.ArgumentParser(description="Dig out links from a website.") | ||||
|     parser.add_argument('site', type=str, help="Website that you want to scrape for links.") | ||||
|     parser.add_argument('-f', '--filter', type=str, default='\..*$', help="Only return filenames matching this regular expression.") | ||||
|     args = parser.parse_args() | ||||
|      | ||||
|     h = httplib2.Http('.cache') | ||||
|     response, content = h.request(args.site) | ||||
|     s = BeautifulSoup(content) | ||||
|     """find only file names""" | ||||
|     links = s.find_all(href=re.compile('\..*$')) | ||||
|     links = s.find_all(href=re.compile(args.filter)) | ||||
|     if not links or links == 0: | ||||
|         exit("No filenames found with the given filter.") | ||||
|     for link in links: | ||||
|         print(args.site + link['href']) | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 Lukáš Kucharczyk
					Lukáš Kucharczyk