You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							122 lines
						
					
					
						
							5.0 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							122 lines
						
					
					
						
							5.0 KiB
						
					
					
				| # -*- coding: utf-8 -*- | |
| ############################################################################## | |
| # | |
| #    Cybrosys Technologies Pvt. Ltd. | |
| #    Copyright (C) 2025-TODAY Cybrosys Technologies(<http://www.cybrosys.com>). | |
| #    Author: Vishnu kp(<https://www.cybrosys.com>) | |
| #    you can modify it under the terms of the GNU AFFERO GENERAL | |
| #    PUBLIC LICENSE (AGPL v3), Version 3. | |
| 
 | |
| #    This program is distributed in the hope that it will be useful, | |
| #    but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
| #    GNU AFFERO GENERAL PUBLIC LICENSE (AGPL v3) for more details. | |
| # | |
| #    You should have received a copy of the GNU AFFERO GENERAL PUBLIC | |
| #    LICENSE (AGPL v3) along with this program. | |
| #    If not, see <http://www.gnu.org/licenses/>. | |
| # | |
| ############################################################################## | |
| import imghdr | |
| import posixpath | |
| import re | |
| import urllib.request | |
| import urllib | |
| 
 | |
| 
 | |
| class Bing: | |
|     """Download images from bing""" | |
|     def __init__(self, query, limit, output_dir, adult, timeout, filter='', | |
|                  verbose=True): | |
|         self.download_count = 0 | |
|         self.query = query | |
|         self.output_dir = output_dir | |
|         self.adult = adult | |
|         self.filter = filter | |
|         self.verbose = verbose | |
|         self.seen = set() | |
|         assert isinstance(limit, int), "limit must be integer" | |
|         self.limit = limit | |
|         assert isinstance(timeout, int), "timeout must be integer" | |
|         self.timeout = timeout | |
| 
 | |
|         self.page_counter = 0 | |
|         self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' | |
|                                       'AppleWebKit/537.11 (KHTML, like Gecko) ' | |
|                                       'Chrome/23.0.1271.64 Safari/537.11', | |
|                         'Accept': 'text/html,application/xhtml+xml,' | |
|                                   'application/xml;q=0.9,*/*;q=0.8', | |
|                         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', | |
|                         'Accept-Encoding': 'none', | |
|                         'Accept-Language': 'en-US,en;q=0.8', | |
|                         'Connection': 'keep-alive'} | |
| 
 | |
|     def get_filter(self, shorthand): | |
|         """Get the filter from bing""" | |
|         if shorthand in ["line", "linedrawing"]: | |
|             return "+filterui:photo-linedrawing" | |
|         elif shorthand == "photo": | |
|             return "+filterui:photo-photo" | |
|         elif shorthand == "clipart": | |
|             return "+filterui:photo-clipart" | |
|         elif shorthand in ["gif", "animatedgif"]: | |
|             return "+filterui:photo-animatedgif" | |
|         elif shorthand == "transparent": | |
|             return "+filterui:photo-transparent" | |
|         else: | |
|             return "" | |
| 
 | |
|     def save_image(self, link, file_path): | |
|         """Save image to directory""" | |
|         request = urllib.request.Request(link, None, self.headers) | |
|         image = urllib.request.urlopen(request, timeout=self.timeout).read() | |
|         if not imghdr.what(None, image): | |
|             raise ValueError('Invalid image, not saving {}\n'.format(link)) | |
|         with open(str(file_path), 'wb') as path_string: | |
|             path_string.write(image) | |
| 
 | |
|     def download_image(self, link): | |
|         """Download the images using the url obtained""" | |
|         self.download_count += 1 | |
|         # Get the image link | |
|         try: | |
|             path = urllib.parse.urlsplit(link).path | |
|             filename = posixpath.basename(path).split('?')[0] | |
|             file_type = filename.split(".")[-1] | |
|             if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", | |
|                                          "gif", "bmp", "png", "webp", "jpg"]: | |
|                 file_type = "jpg" | |
| 
 | |
|             self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format( | |
|                 str(self.download_count), file_type))) | |
|             if self.verbose: | |
|                 return link | |
| 
 | |
|         except Exception: | |
|             self.download_count -= 1 | |
|             self.seen.remove(link) | |
| 
 | |
|     def run(self): | |
|         """run the download function""" | |
|         while self.download_count < self.limit: | |
|             request_url = 'https://www.bing.com/images/async?q=' \ | |
|                           + urllib.parse.quote_plus(self.query) \ | |
|                           + '&first=' + str(self.page_counter) + '&count=' \ | |
|                           + str(self.limit) \ | |
|                           + '&adlt=' + self.adult + '&qft=' + ( | |
|                               '' if self.filter is None else self.get_filter( | |
|                                   self.filter)) | |
|             request = urllib.request.Request(request_url, None, | |
|                                              headers=self.headers) | |
|             response = urllib.request.urlopen(request) | |
|             html = response.read().decode('utf8') | |
|             if html == "": | |
|                 break | |
|             links = re.findall('murl":"(.*?)"', html) | |
| 
 | |
|             for link in links: | |
|                 if self.download_count < self.limit and link not in self.seen: | |
|                     self.seen.add(link) | |
|                     self.download_image(link) | |
|             self.page_counter += 1 | |
|         return self.seen
 | |
| 
 |