You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							120 lines
						
					
					
						
							4.8 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							120 lines
						
					
					
						
							4.8 KiB
						
					
					
				| # -*- coding: utf-8 -*- | |
| ############################################################################### | |
| # | |
| #    Cybrosys Technologies Pvt. Ltd. | |
| # | |
| #    Copyright (C) 2023-TODAY Cybrosys Technologies(<https://www.cybrosys.com>) | |
| #    Author: RAHUL CK (odoo@cybrosys.com) | |
| # | |
| #    You can modify it under the terms of the GNU AFFERO | |
| #    GENERAL PUBLIC LICENSE (AGPL v3), Version 3. | |
| # | |
| #    This program is distributed in the hope that it will be useful, | |
| #    but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
| #    GNU AFFERO GENERAL PUBLIC LICENSE (AGPL v3) for more details. | |
| # | |
| #    You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE | |
| #    (AGPL v3) along with this program. | |
| #    If not, see <http://www.gnu.org/licenses/>. | |
| # | |
| ############################################################################### | |
| import imghdr | |
| import posixpath | |
| import re | |
| import urllib | |
| import urllib.request | |
| 
 | |
| 
 | |
| class Bing: | |
|     def __init__(self, query, limit, output_dir, adult, timeout, filter='', | |
|                  verbose=True): | |
|         self.download_count = 0 | |
|         self.query = query | |
|         self.output_dir = output_dir | |
|         self.adult = adult | |
|         self.filter = filter | |
|         self.verbose = verbose | |
|         self.seen = set() | |
| 
 | |
|         assert type(limit) == int, "limit must be integer" | |
|         self.limit = limit | |
|         assert type(timeout) == int, "timeout must be integer" | |
|         self.timeout = timeout | |
| 
 | |
|         self.page_counter = 0 | |
|         self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' | |
|                                       'AppleWebKit/537.11 (KHTML, like Gecko) ' | |
|                                       'Chrome/23.0.1271.64 Safari/537.11', | |
|                         'Accept': 'text/html,application/xhtml+xml,' | |
|                                   'application/xml;q=0.9,*/*;q=0.8', | |
|                         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', | |
|                         'Accept-Encoding': 'none', | |
|                         'Accept-Language': 'en-US,en;q=0.8', | |
|                         'Connection': 'keep-alive'} | |
| 
 | |
|     def get_filter(self, shorthand): | |
|         if shorthand == "line" or shorthand == "linedrawing": | |
|             return "+filterui:photo-linedrawing" | |
|         elif shorthand == "photo": | |
|             return "+filterui:photo-photo" | |
|         elif shorthand == "clipart": | |
|             return "+filterui:photo-clipart" | |
|         elif shorthand == "gif" or shorthand == "animatedgif": | |
|             return "+filterui:photo-animatedgif" | |
|         elif shorthand == "transparent": | |
|             return "+filterui:photo-transparent" | |
|         else: | |
|             return "" | |
| 
 | |
|     def save_image(self, link, file_path): | |
|         request = urllib.request.Request(link, None, self.headers) | |
|         image = urllib.request.urlopen(request, timeout=self.timeout).read() | |
|         if not imghdr.what(None, image): | |
|             raise ValueError('Invalid image, not saving {}\n'.format(link)) | |
|         with open(str(file_path), 'wb') as f: | |
|             f.write(image) | |
| 
 | |
|     def download_image(self, link): | |
|         self.download_count += 1 | |
|         try: | |
|             path = urllib.parse.urlsplit(link).path | |
|             filename = posixpath.basename(path).split('?')[0] | |
|             file_type = filename.split(".")[-1] | |
|             if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", | |
|                                          "gif", "bmp", "png", "webp", "jpg"]: | |
|                 file_type = "jpg" | |
| 
 | |
|             self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format( | |
|                 str(self.download_count), file_type))) | |
|             if self.verbose: | |
|                 return link | |
| 
 | |
|         except Exception as e: | |
|             self.download_count -= 1 | |
|             self.seen.remove(link) | |
| 
 | |
|     def run(self): | |
|         while self.download_count < self.limit: | |
|             request_url = 'https://www.bing.com/images/async?q=' \ | |
|                           + urllib.parse.quote_plus(self.query) \ | |
|                           + '&first=' + str(self.page_counter) + '&count=' \ | |
|                           + str(self.limit) \ | |
|                           + '&adlt=' + self.adult + '&qft=' + ( | |
|                               '' if self.filter is None else self.get_filter( | |
|                                   self.filter)) | |
|             request = urllib.request.Request(request_url, None, | |
|                                              headers=self.headers) | |
|             response = urllib.request.urlopen(request) | |
|             html = response.read().decode('utf8') | |
|             if html == "": | |
|                 break | |
|             links = re.findall('murl":"(.*?)"', html) | |
| 
 | |
|             for link in links: | |
|                 if self.download_count < self.limit and link not in self.seen: | |
|                     self.seen.add(link) | |
|                     self.download_image(link) | |
| 
 | |
|             self.page_counter += 1 | |
|         return self.seen
 | |
| 
 |