You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							118 lines
						
					
					
						
							4.8 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							118 lines
						
					
					
						
							4.8 KiB
						
					
					
				
								# -*- coding: utf-8 -*-
							 | 
						|
								##############################################################################
							 | 
						|
								#
							 | 
						|
								#    Cybrosys Technologies Pvt. Ltd.
							 | 
						|
								#    Copyright (C) 2023-TODAY Cybrosys Technologies(<http://www.cybrosys.com>).
							 | 
						|
								#    Author: Rahul CK(<https://www.cybrosys.com>)
							 | 
						|
								#    you can modify it under the terms of the GNU LESSER
							 | 
						|
								#    GENERAL PUBLIC LICENSE (AGPL v3), Version 3.
							 | 
						|
								
							 | 
						|
								#    This program is distributed in the hope that it will be useful,
							 | 
						|
								#    but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						|
								#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
							 | 
						|
								#    GNU LESSER GENERAL PUBLIC LICENSE (AGPL v3) for more details.
							 | 
						|
								#
							 | 
						|
								#    You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE
							 | 
						|
								#    GENERAL PUBLIC LICENSE (AGPL v3) along with this program.
							 | 
						|
								#    If not, see <http://www.gnu.org/licenses/>.
							 | 
						|
								#
							 | 
						|
								##############################################################################
							 | 
						|
								import urllib.request
							 | 
						|
								import urllib
							 | 
						|
								import imghdr
							 | 
						|
								import posixpath
							 | 
						|
								import re
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								class Bing:
							 | 
						|
								    def __init__(self, query, limit, output_dir, adult, timeout, filter='',
							 | 
						|
								                 verbose=True):
							 | 
						|
								        self.download_count = 0
							 | 
						|
								        self.query = query
							 | 
						|
								        self.output_dir = output_dir
							 | 
						|
								        self.adult = adult
							 | 
						|
								        self.filter = filter
							 | 
						|
								        self.verbose = verbose
							 | 
						|
								        self.seen = set()
							 | 
						|
								
							 | 
						|
								        assert type(limit) == int, "limit must be integer"
							 | 
						|
								        self.limit = limit
							 | 
						|
								        assert type(timeout) == int, "timeout must be integer"
							 | 
						|
								        self.timeout = timeout
							 | 
						|
								
							 | 
						|
								        self.page_counter = 0
							 | 
						|
								        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
							 | 
						|
								                                      'AppleWebKit/537.11 (KHTML, like Gecko) '
							 | 
						|
								                                      'Chrome/23.0.1271.64 Safari/537.11',
							 | 
						|
								                        'Accept': 'text/html,application/xhtml+xml,'
							 | 
						|
								                                  'application/xml;q=0.9,*/*;q=0.8',
							 | 
						|
								                        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
							 | 
						|
								                        'Accept-Encoding': 'none',
							 | 
						|
								                        'Accept-Language': 'en-US,en;q=0.8',
							 | 
						|
								                        'Connection': 'keep-alive'}
							 | 
						|
								
							 | 
						|
								    def get_filter(self, shorthand):
							 | 
						|
								        if shorthand == "line" or shorthand == "linedrawing":
							 | 
						|
								            return "+filterui:photo-linedrawing"
							 | 
						|
								        elif shorthand == "photo":
							 | 
						|
								            return "+filterui:photo-photo"
							 | 
						|
								        elif shorthand == "clipart":
							 | 
						|
								            return "+filterui:photo-clipart"
							 | 
						|
								        elif shorthand == "gif" or shorthand == "animatedgif":
							 | 
						|
								            return "+filterui:photo-animatedgif"
							 | 
						|
								        elif shorthand == "transparent":
							 | 
						|
								            return "+filterui:photo-transparent"
							 | 
						|
								        else:
							 | 
						|
								            return ""
							 | 
						|
								
							 | 
						|
								    def save_image(self, link, file_path):
							 | 
						|
								        request = urllib.request.Request(link, None, self.headers)
							 | 
						|
								        image = urllib.request.urlopen(request, timeout=self.timeout).read()
							 | 
						|
								        if not imghdr.what(None, image):
							 | 
						|
								            raise ValueError('Invalid image, not saving {}\n'.format(link))
							 | 
						|
								        with open(str(file_path), 'wb') as f:
							 | 
						|
								            f.write(image)
							 | 
						|
								
							 | 
						|
								    def download_image(self, link):
							 | 
						|
								        self.download_count += 1
							 | 
						|
								        try:
							 | 
						|
								            path = urllib.parse.urlsplit(link).path
							 | 
						|
								            filename = posixpath.basename(path).split('?')[0]
							 | 
						|
								            file_type = filename.split(".")[-1]
							 | 
						|
								            if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff",
							 | 
						|
								                                         "gif", "bmp", "png", "webp", "jpg"]:
							 | 
						|
								                file_type = "jpg"
							 | 
						|
								
							 | 
						|
								            self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
							 | 
						|
								                str(self.download_count), file_type)))
							 | 
						|
								            if self.verbose:
							 | 
						|
								                return link
							 | 
						|
								
							 | 
						|
								        except Exception as e:
							 | 
						|
								            self.download_count -= 1
							 | 
						|
								            self.seen.remove(link)
							 | 
						|
								
							 | 
						|
								    def run(self):
							 | 
						|
								        while self.download_count < self.limit:
							 | 
						|
								            request_url = 'https://www.bing.com/images/async?q=' \
							 | 
						|
								                          + urllib.parse.quote_plus(self.query) \
							 | 
						|
								                          + '&first=' + str(self.page_counter) + '&count=' \
							 | 
						|
								                          + str(self.limit) \
							 | 
						|
								                          + '&adlt=' + self.adult + '&qft=' + (
							 | 
						|
								                              '' if self.filter is None else self.get_filter
							 | 
						|
								                              (self.filter))
							 | 
						|
								            request = urllib.request.Request(request_url, None,
							 | 
						|
								                                             headers=self.headers)
							 | 
						|
								            response = urllib.request.urlopen(request)
							 | 
						|
								            html = response.read().decode('utf8')
							 | 
						|
								            if html == "":
							 | 
						|
								                break
							 | 
						|
								            links = re.findall('murl":"(.*?)"', html)
							 | 
						|
								
							 | 
						|
								            for link in links:
							 | 
						|
								                if self.download_count < self.limit and link not in self.seen:
							 | 
						|
								                    self.seen.add(link)
							 | 
						|
								                    self.download_image(link)
							 | 
						|
								
							 | 
						|
								            self.page_counter += 1
							 | 
						|
								        return self.seen
							 | 
						|
								
							 |