CybroAddons/product_image_suggestion/models/bing.py


								# -*- coding: utf-8 -*-

								##############################################################################

								#

								#    Cybrosys Technologies Pvt. Ltd.

								#    Copyright (C) 2023-TODAY Cybrosys Technologies(<http://www.cybrosys.com>).

								#    Author: Rahul CK(<https://www.cybrosys.com>)

								#    you can modify it under the terms of the GNU LESSER

								#    GENERAL PUBLIC LICENSE (AGPL v3), Version 3.


								#    This program is distributed in the hope that it will be useful,

								#    but WITHOUT ANY WARRANTY; without even the implied warranty of

								#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								#    GNU LESSER GENERAL PUBLIC LICENSE (AGPL v3) for more details.

								#

								#    You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE

								#    GENERAL PUBLIC LICENSE (AGPL v3) along with this program.

								#    If not, see <http://www.gnu.org/licenses/>.

								#

								##############################################################################

								import urllib.request

								import urllib

								import imghdr

								import posixpath

								import re


								class Bing:

								    def __init__(self, query, limit, output_dir, adult, timeout, filter='',

								                 verbose=True):

								        self.download_count = 0

								        self.query = query

								        self.output_dir = output_dir

								        self.adult = adult

								        self.filter = filter

								        self.verbose = verbose

								        self.seen = set()


								        assert type(limit) == int, "limit must be integer"

								        self.limit = limit

								        assert type(timeout) == int, "timeout must be integer"

								        self.timeout = timeout


								        self.page_counter = 0

								        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '

								                                      'AppleWebKit/537.11 (KHTML, like Gecko) '

								                                      'Chrome/23.0.1271.64 Safari/537.11',

								                        'Accept': 'text/html,application/xhtml+xml,'

								                                  'application/xml;q=0.9,*/*;q=0.8',

								                        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',

								                        'Accept-Encoding': 'none',

								                        'Accept-Language': 'en-US,en;q=0.8',

								                        'Connection': 'keep-alive'}


								    def get_filter(self, shorthand):

								        if shorthand == "line" or shorthand == "linedrawing":

								            return "+filterui:photo-linedrawing"

								        elif shorthand == "photo":

								            return "+filterui:photo-photo"

								        elif shorthand == "clipart":

								            return "+filterui:photo-clipart"

								        elif shorthand == "gif" or shorthand == "animatedgif":

								            return "+filterui:photo-animatedgif"

								        elif shorthand == "transparent":

								            return "+filterui:photo-transparent"

								        else:

								            return ""


								    def save_image(self, link, file_path):

								        request = urllib.request.Request(link, None, self.headers)

								        image = urllib.request.urlopen(request, timeout=self.timeout).read()

								        if not imghdr.what(None, image):

								            raise ValueError('Invalid image, not saving {}\n'.format(link))

								        with open(str(file_path), 'wb') as f:

								            f.write(image)


								    def download_image(self, link):

								        self.download_count += 1

								        try:

								            path = urllib.parse.urlsplit(link).path

								            filename = posixpath.basename(path).split('?')[0]

								            file_type = filename.split(".")[-1]

								            if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff",

								                                         "gif", "bmp", "png", "webp", "jpg"]:

								                file_type = "jpg"


								            self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(

								                str(self.download_count), file_type)))

								            if self.verbose:

								                return link


								        except Exception as e:

								            self.download_count -= 1

								            self.seen.remove(link)


								    def run(self):

								        while self.download_count < self.limit:

								            request_url = 'https://www.bing.com/images/async?q=' \

								                          + urllib.parse.quote_plus(self.query) \

								                          + '&first=' + str(self.page_counter) + '&count=' \

								                          + str(self.limit) \

								                          + '&adlt=' + self.adult + '&qft=' + (

								                              '' if self.filter is None else self.get_filter

								                              (self.filter))

								            request = urllib.request.Request(request_url, None,

								                                             headers=self.headers)

								            response = urllib.request.urlopen(request)

								            html = response.read().decode('utf8')

								            if html == "":

								                break

								            links = re.findall('murl&quot;:&quot;(.*?)&quot;', html)


								            for link in links:

								                if self.download_count < self.limit and link not in self.seen:

								                    self.seen.add(link)

								                    self.download_image(link)


								            self.page_counter += 1

								        return self.seen