Update formatting
This commit is contained in:
		
							
								
								
									
										31
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								main.py
									
									
									
									
									
								
							| @ -6,12 +6,16 @@ from collections import Counter | |||||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     parser = argparse.ArgumentParser(description="Dig out links from a website.") |     parser = argparse.ArgumentParser( | ||||||
|     parser.add_argument('site', type=str, help="Website that you want to scrape for links.") |         description="Dig out links from a website.") | ||||||
|  |     parser.add_argument( | ||||||
|  |         'site', type=str, help="Website that you want to scrape for links.") | ||||||
|     parser.add_argument('-f', '--filter', type=str, default=r'..*$', |     parser.add_argument('-f', '--filter', type=str, default=r'..*$', | ||||||
|                         help="Only return file names matching this regular expression.") |                         help="Only return file names matching this regular expression.") | ||||||
|     parser.add_argument('-s', '--summary', action='store_true', help="Print summary and quit.") |     parser.add_argument('-s', '--summary', action='store_true', | ||||||
|     parser.add_argument('-d', '--debug', action='store_true', help="Print out debugging information.") |                         help="Print summary and quit.") | ||||||
|  |     parser.add_argument('-d', '--debug', action='store_true', | ||||||
|  |                         help="Print out debugging information.") | ||||||
|     parser.add_argument('-r', '--save-report', action='store_true', |     parser.add_argument('-r', '--save-report', action='store_true', | ||||||
|                         help='Save report of all the original links to a file.') |                         help='Save report of all the original links to a file.') | ||||||
|  |  | ||||||
| @ -40,7 +44,8 @@ if __name__ == '__main__': | |||||||
|                     has_jpeg = True |                     has_jpeg = True | ||||||
|                 extensions.append(extension) |                 extensions.append(extension) | ||||||
|             if args.debug: |             if args.debug: | ||||||
|                 print(f'Original: {link}, splitext[0]: {filename}, splitext[1]: {extension}') |                 print( | ||||||
|  |                     f'Original: {link}, splitext[0]: {filename}, splitext[1]: {extension}') | ||||||
|         # todo: merge jpeg and jpg into one extension (display as: xxx jpg (yyy as jpeg)) |         # todo: merge jpeg and jpg into one extension (display as: xxx jpg (yyy as jpeg)) | ||||||
|         print(f'URL: {args.site}') |         print(f'URL: {args.site}') | ||||||
|         print(f'Total file names: {len(links)}') |         print(f'Total file names: {len(links)}') | ||||||
| @ -49,14 +54,14 @@ if __name__ == '__main__': | |||||||
|         extension_count = Counter(extensions) |         extension_count = Counter(extensions) | ||||||
|         if has_jpeg: |         if has_jpeg: | ||||||
|  |  | ||||||
|         for extension, count in extension_count.items(): |             for extension, count in extension_count.items(): | ||||||
|             # do not print the first character (dot) of extension |                 # do not print the first character (dot) of extension | ||||||
|             if not has_jpeg: |                 if not has_jpeg: | ||||||
|                 print(f'{count}\t{extension[1:]}') |                     print(f'{count}\t{extension[1:]}') | ||||||
|             else: |                 else: | ||||||
|                 if extension == '.jpg' or extension == '.jpeg': |                     if extension == '.jpg' or extension == '.jpeg': | ||||||
|                     print(f"{count}\t{extension[1:]} (with some as .jpeg") |                         print(f"{count}\t{extension[1:]} (with some as .jpeg") | ||||||
|         exit(0) |             exit(0) | ||||||
|     # we got to the end, print out all the links |     # we got to the end, print out all the links | ||||||
|     link_list = [f'{args.site}{link["href"]}' for link in links] |     link_list = [f'{args.site}{link["href"]}' for link in links] | ||||||
|     for link in link_list: |     for link in link_list: | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user