You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							508 lines
						
					
					
						
							24 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							508 lines
						
					
					
						
							24 KiB
						
					
					
				| # -*- coding: utf-8 -*- | |
| ############################################################################### | |
| # | |
| #    Cybrosys Technologies Pvt. Ltd. | |
| # | |
| #    Copyright (C) 2024-TODAY Cybrosys Technologies(<https://www.cybrosys.com>) | |
| #    Author: Sruthi Renjith (odoo@cybrosys.com) | |
| # | |
| #    You can modify it under the terms of the GNU LESSER | |
| #    GENERAL PUBLIC LICENSE (LGPL v3), Version 3. | |
| # | |
| #    This program is distributed in the hope that it will be useful, | |
| #    but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | |
| #    GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. | |
| # | |
| #    You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE | |
| #    (LGPL v3) along with this program. | |
| #    If not, see <http://www.gnu.org/licenses/>. | |
| # | |
| ############################################################################### | |
| import io | |
| import os | |
| from pdf2image import convert_from_bytes | |
| from PIL import Image, ImageOps | |
| import pytesseract | |
| import re | |
| import spacy | |
| from odoo import api, fields, models, _ | |
| from odoo.exceptions import ValidationError | |
| 
 | |
| 
 | |
| class OCRDataTemplate(models.TransientModel): | |
|     """ Class to read document and extract the text from JPG, JPEG, PNG and | |
|     PDF files. """ | |
|     _name = "ocr.data.template" | |
|     _description = "Data retrieving template" | |
|     _rec_name = "file_name" | |
| 
 | |
|     image = fields.Binary(string="Document", attachment=True, required=True, | |
|                           help="Upload .jpg, .jpeg, .png or .pdf files") | |
|     file_name = fields.Char(string="Document Name", help="Document name") | |
|     image2 = fields.Image(string="Document", | |
|                           help="Uploaded document") | |
|     flag = fields.Boolean(default=False, string="Is Read", | |
|                           help="Flag to check the document read or not") | |
|     data = fields.Text(string="Data", readonly=True, | |
|                        help="Content from the document") | |
|     model_name_id = fields.Many2one( | |
|         'ir.model', string="Model", | |
|         domain="[" | |
|                "('model', 'in', ['res.partner', 'account.move', 'hr.employee'," | |
|                " 'hr.expense', 'sale.order', 'purchase.order'])]", | |
|         help="Model to which the data want to map") | |
|     model_field_ids = fields.Many2many( | |
|         'ir.model.fields', string="Fields", | |
|         domain="[('model_id', '=', model_name_id)]", | |
|         help="Fields names to map data") | |
| 
 | |
|     def data_segmentation(self, img): | |
|         """ | |
|         Function to do segmentation for the retrieved data after converting it | |
|         into image. | |
|         :param img: The image format of the document that need to undergo the | |
|         segmentation procedure. | |
|         :return: The segments of the image. | |
|         """ | |
|         img = ImageOps.grayscale(img) | |
|         img = img.point(lambda x: 255 if x > 176 else 0, '1') | |
|         img_rgb = ImageOps.invert(img.convert("RGB")) | |
|         segments = [] | |
|         segment_bounds = img_rgb.getbbox() | |
|         while segment_bounds: | |
|             segment = img_rgb.crop(segment_bounds) | |
|             if segment.size[0] > 0 and segment.size[1] > 0: | |
|                 segments.append(segment) | |
|             img_rgb = ImageOps.crop(img_rgb, segment_bounds) | |
|             segment_bounds = img_rgb.getbbox() | |
|         return segments | |
| 
 | |
|     def action_get_data(self): | |
|         """ | |
|         Function to get the files in .jpg, .jpeg, .png and .pdf formats. | |
|         """ | |
|         self.flag = True | |
|         split_tup = os.path.splitext(self.file_name) | |
|         try: | |
|             # Getting the file path from ir.attachments. | |
|             file_attachment = self.env["ir.attachment"].search( | |
|                 ['|', ('res_field', '!=', False), ('res_field', '=', False), | |
|                  ('res_id', '=', self.id), | |
|                  ('res_model', '=', 'ocr.data.template')], | |
|                 limit=1) | |
|             file_path = file_attachment._full_path(file_attachment.store_fname) | |
|             segmented_data = [] | |
|             # Reading files in the format .jpg, .jpeg and .png. | |
|             if split_tup[1] == '.jpg' or split_tup[1] == '.jpeg' or split_tup[ | |
|                  1] == '.png': | |
|                 with open(file_path, mode='rb') as f: | |
|                     binary_data = f.read() | |
|                 img = Image.open(io.BytesIO(binary_data)) | |
|                 # Calling the function to do segmentation. | |
|                 segmented_data = self.data_segmentation(img) | |
|             elif split_tup[1] == '.pdf': | |
|                 # Reading files in the format .pdf. | |
|                 with open(file_path, mode='rb') as f: | |
|                     pdf_data = f.read() | |
|                 pages = convert_from_bytes(pdf_data) | |
|                 # Making the contents in 2 or more pages into combined page. | |
|                 max_width = max(page.width for page in pages) | |
|                 total_height = sum(page.height for page in pages) | |
|                 resized_images = [page.resize((2400, 1800)) for page in pages] | |
|                 combined_image = Image.new('RGB', (max_width, total_height)) | |
|                 y_offset = 0 | |
|                 for resized_page in resized_images: | |
|                     combined_image.paste(resized_page, (0, y_offset)) | |
|                     y_offset += resized_page.height | |
|                 # Calling the segmentation function. | |
|                 segmented_data = self.data_segmentation(combined_image) | |
|         except Exception: | |
|             self.env['ocr.data.template'].search([], order="id desc", | |
|                                                  limit=1).unlink() | |
|             raise ValidationError(_("Cannot identify data")) | |
|         # Converting the segmented image into text using pytesseract. | |
|         text = "" | |
|         for segment in segmented_data: | |
|             try: | |
|                 text += pytesseract.image_to_string(segment) + "\n" | |
|                 break | |
|             except Exception: | |
|                 raise ValidationError(_("Data cannot be read")) | |
|         # Assigning retrieved data into text field. | |
|         self.data = text | |
| 
 | |
|     @api.onchange('model_name_id') | |
|     def _onchange_model_name_id(self): | |
|         """ Function to update the Many2many field to empty """ | |
|         self.write({'model_field_ids': [(6, 0, [])]}) | |
| 
 | |
|     def find_person_name(self): | |
|         """ | |
|         Function to find person name from the retrieved text using 'spacy' | |
|         """ | |
|         person = '' | |
|         nlp = spacy.load("en_core_web_sm") | |
|         doc = nlp(self.data) | |
|         for entity in doc.ents: | |
|             if entity.label_ == "PERSON": | |
|                 person = entity.text | |
|                 break | |
|         return person | |
| 
 | |
|     def get_order_line(self, text): | |
|         """ | |
|         Function to find product lines from retrieved data using regex. | |
|         :param text: The extracted text to find the order lines from it | |
|         :return: The order lines found from text | |
|         """ | |
|         product_line_list = [] | |
|         quantities = [] | |
|         unit_prices = [] | |
|         product_regex = r'\[?(.+?)\]?\s*(.+)\n(?:HSN/SAC Code):\s+(\d+)' | |
|         quantity_regex = r"Quantity Unit\n([\d.\s\S]+)" | |
|         unit_price_regex = r"Amount\n([\d.\s\S]+)" | |
|         # Matching the pattern with the data. | |
|         quantity_match = re.search(quantity_regex, text) | |
|         price_match = re.search(unit_price_regex, text) | |
|         if quantity_match: | |
|             quantity_unit_text = quantity_match.group(1) | |
|             # If matched finding a particular pattern for quantities | |
|             # form that group. | |
|             quantities = re.findall(r"\d+\.\d+", quantity_unit_text) | |
|         if price_match: | |
|             price_unit_text = price_match.group(1) | |
|             # If matched finding a particular pattern for unit price | |
|             # form that group. | |
|             unit_prices = re.findall(r"\d+\.\d+", price_unit_text) | |
|         # Finding the data that matches the pattern for products. | |
|         products = re.findall(product_regex, text) | |
|         number_of_product = len(products) | |
|         number_of_qty = len(quantities) | |
|         number_of_price = len(unit_prices) | |
|         # Getting the products and its corresponding quantity and price. | |
|         if number_of_product == number_of_qty == number_of_price: | |
|             product_line_list = [ | |
|                 {'product': products[i], 'quantity': quantities[i], | |
|                  'price': unit_prices[i]} | |
|                 for i in range(number_of_product)] | |
|         elif number_of_product == number_of_qty: | |
|             product_line_list = [ | |
|                 {'product': products[i], 'quantity': quantities[i]} | |
|                 for i in range(number_of_product)] | |
|         elif number_of_product == number_of_price: | |
|             product_line_list = [ | |
|                 {'product': products[i], 'price': unit_prices[i]} | |
|                 for i in range(number_of_product)] | |
|         elif products: | |
|             product_line_list = [{'product': products[i]} for i in range(number_of_product)] | |
|         return product_line_list | |
| 
 | |
|     def action_process_data(self): | |
|         """ | |
|         Function to process the data after fetching it. | |
|         The fetched data are mapping into some models. | |
|         """ | |
|         phone_number = '' | |
|         email_address = '' | |
|         person = '' | |
|         phone_pattern = r'\(\d{3}\) \d{3}-\d{4}|\d{3}-\d{3}-\d{4}|\+\d{1}-\d{3}-\d{3}-\d{4}|\d{11}|P \+\d{3} \d{6}' | |
|         email_pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}' | |
|         if self.model_name_id.name == 'Contact': | |
|             # Mapping the data into Contact module by fetching person name, | |
|             # phone number and email id from data. | |
|             field_value = False | |
|             non_field_count = 0 | |
|             for field in self.model_field_ids: | |
|                 if field.name == 'name' or field.name == 'display_name': | |
|                     person = self.find_person_name() | |
|                     if not person: | |
|                         raise ValidationError(_("Partner name cannot find")) | |
|                     field_value = True | |
|                 elif field.name == 'phone': | |
|                     phone = re.findall(phone_pattern, self.data) | |
|                     if phone: | |
|                         phone_number = phone[0] | |
|                 elif field.name == 'email': | |
|                     email = re.findall(email_pattern, self.data) | |
|                     if email: | |
|                         email_address = email[0] | |
|                 else: | |
|                     non_field_count = 1 | |
|             if not field_value and non_field_count == 1: | |
|                 raise ValidationError(_("No data to map into the field")) | |
|             if person: | |
|                 partner = self.env['res.partner'].search( | |
|                     [('name', '=', person)], limit=1) | |
|                 if not partner: | |
|                     # Creating record in res.partner. | |
|                     partner_record = self.env['res.partner'].create({ | |
|                         'name': person, | |
|                         'email': email_address, | |
|                         'phone': phone_number | |
|                     }) | |
|                 else: | |
|                     raise ValidationError(_("Partner already exist")) | |
|             else: | |
|                 raise ValidationError(_("Name field is not chosen to create" | |
|                                         " partner")) | |
|             if partner_record: | |
|                 return { | |
|                     'name': "Partner", | |
|                     'type': 'ir.actions.act_window', | |
|                     'view_type': 'form', | |
|                     'view_mode': 'form', | |
|                     'res_model': 'res.partner', | |
|                     'res_id': partner_record.id, | |
|                     'view_id': self.env.ref('base.view_partner_form').id, | |
|                     'target': 'current', | |
|                 } | |
|         elif self.model_name_id.name == 'Journal Entry': | |
|             # Mapping data into Journal Entry. Creating a record in vendor bill | |
|             vendor_bill_flag = False | |
|             for field in self.model_field_ids: | |
|                 # Taking the file path from ir.attachment. | |
|                 if field.name == 'invoice_vendor_bill_id': | |
|                     try: | |
|                         file_attachment = self.env["ir.attachment"].search( | |
|                             ['|', ('res_field', '!=', False), | |
|                              ('res_field', '=', False), | |
|                              ('res_id', '=', self.id), | |
|                              ('res_model', '=', 'ocr.data.template')], | |
|                             limit=1) | |
|                         file_path = file_attachment._full_path( | |
|                             file_attachment.store_fname) | |
|                         with open(file_path, mode='rb') as f: | |
|                             binary_data = f.read() | |
|                         img = Image.open(io.BytesIO(binary_data)) | |
|                         # Resizing the image to improve the clarity. | |
|                         resized_img = img.resize( | |
|                             (img.width * 2, img.height * 2), | |
|                             resample=Image.BICUBIC) | |
|                     except Exception: | |
|                         raise ValidationError(_("Can't create vendor bill")) | |
|                     # Converting the image into text using OCR python package | |
|                     # pytesseract. | |
|                     try: | |
|                         text = pytesseract.image_to_string(resized_img) | |
|                     except Exception: | |
|                         raise ValidationError(_("Can't create vendor bill")) | |
|                     bill = self.env['digitize.bill'] | |
|                     # Calling the function to create vendor bill | |
|                     # from model digitize.bill. | |
|                     bill_record = bill.create_record(text) | |
|                     return { | |
|                         'name': "Bill", | |
|                         'type': 'ir.actions.act_window', | |
|                         'view_type': 'form', | |
|                         'view_mode': 'form', | |
|                         'res_model': 'account.move', | |
|                         'res_id': bill_record.id, | |
|                         'view_id': self.env.ref('account.view_move_form').id, | |
|                         'target': 'current', | |
|                     } | |
|             if not vendor_bill_flag: | |
|                 raise ValidationError(_("No data to map into the field")) | |
|         elif self.model_name_id.name == 'Employee': | |
|             # Mapping the data into Employee module by fetching person name, | |
|             # phone number and email. | |
|             field_value = False | |
|             non_field_count = 0 | |
|             for field in self.model_field_ids: | |
|                 if field.name == 'name' or field.name == 'display_name' or \ | |
|                         field.name == 'emergency_contact': | |
|                     person = self.find_person_name() | |
|                     if not person: | |
|                         raise ValidationError(_("Employee name cannot find")) | |
|                     field_value = True | |
|                 elif field.name == 'work_phone' or field.name == 'phone' or \ | |
|                         field.name == 'emergency_phone': | |
|                     phone = re.findall(phone_pattern, self.data) | |
|                     if phone: | |
|                         phone_number = phone[0] | |
|                 elif field.name == 'private_email' or \ | |
|                         field.name == 'work_email': | |
|                     email = re.findall(email_pattern, self.data) | |
|                     if email: | |
|                         email_address = email[0] | |
|                 else: | |
|                     non_field_count = 1 | |
|             if not field_value and non_field_count == 1: | |
|                 raise ValidationError(_("No data to map into the field")) | |
|             if person: | |
|                 partner = self.env['hr.employee'].search( | |
|                     [('name', '=', person)], limit=1) | |
|                 if not partner: | |
|                     # Creating a record in hr.employee by mapping the | |
|                     # data into employee name, work phone and work email. | |
|                     employee_record = self.env['hr.employee'].create({ | |
|                         'name': person, | |
|                         'work_email': email_address, | |
|                         'work_phone': phone_number | |
|                     }) | |
|                 else: | |
|                     raise ValidationError(_("Employee already exist")) | |
|             else: | |
|                 raise ValidationError( | |
|                     _("Name field is not chosen to create employee")) | |
|             if employee_record: | |
|                 return { | |
|                     'name': "Employee", | |
|                     'type': 'ir.actions.act_window', | |
|                     'view_type': 'form', | |
|                     'view_mode': 'form', | |
|                     'res_model': 'hr.employee', | |
|                     'res_id': employee_record.id, | |
|                     'view_id': self.env.ref('hr.view_employee_form').id, | |
|                     'target': 'current', | |
|                 } | |
|         elif self.model_name_id.name == 'Expense': | |
|             # Mapping the data into Expense module. | |
|             expense_product = False | |
|             for field in self.model_field_ids: | |
|                 if field.name == 'name' or field.name == 'product_id': | |
|                     product = self.env['product.product'].search( | |
|                         [('name', '=', 'BILL EXPENSE')], limit=1) | |
|                     if not product: | |
|                         product = self.env['product.product'].create({ | |
|                             'name': 'BILL EXPENSE', | |
|                         }) | |
|                     pattern = r'\b\d+(?:\.\d{1,2})?\b' | |
|                     matches = re.findall(pattern, self.data) | |
|                     total_amount = float(matches[0]) if matches else 0.0 | |
|                     expense_record = self.env['hr.expense'].create({ | |
|                         'name': product.name, | |
|                         'product_id': product.id, | |
|                         'unit_amount': total_amount | |
|                     }) | |
|                     return { | |
|                         'name': "Expense", | |
|                         'type': 'ir.actions.act_window', | |
|                         'view_type': 'form', | |
|                         'view_mode': 'form', | |
|                         'res_model': 'hr.expense', | |
|                         'res_id': expense_record.id, | |
|                         'view_id': self.env.ref( | |
|                             'hr_expense.hr_expense_view_form').id, | |
|                         'target': 'current', | |
|                     } | |
|             if not expense_product: | |
|                 raise ValidationError(_("Can't create an expense without " | |
|                                         "description or category")) | |
|         elif self.model_name_id.name == 'Sales Order': | |
|             # Mapping the data from PDF with proper format into Sale Order. | |
|             partner = False | |
|             sale_order = '' | |
|             field_value = False | |
|             non_field_value = 0 | |
|             for field in self.model_field_ids: | |
|                 if field.name == 'order_line': | |
|                     person = self.find_person_name() | |
|                     if person: | |
|                         partner = self.env['hr.employee'].search( | |
|                             [('name', '=', person)], limit=1) | |
|                         if not partner: | |
|                             partner = self.env['hr.employee'].create({ | |
|                                 'name': person, | |
|                             }) | |
|                     # Calling the function to get order lines. | |
|                     product_line = self.get_order_line(self.data) | |
|                     sale_order = self.env['sale.order'].create({ | |
|                         'partner_id': partner.id, | |
|                     }) | |
|                     if product_line: | |
|                         for item in product_line: | |
|                             if 'quantity' not in item.keys(): | |
|                                 item.update({'quantity': 0}) | |
|                             if 'price' not in item.keys(): | |
|                                 item.update({'price': 0}) | |
|                             product = self.env['product.product'].search( | |
|                                 [('name', '=', item['product'])], limit=1) | |
|                             if not product: | |
|                                 product = self.env['product.product'].create({ | |
|                                     'name': item['product'] | |
|                                 }) | |
|                             item.update({'product': product.id}) | |
|                             self.env['sale.order.line'].create({ | |
|                                 'order_id': sale_order.id, | |
|                                 'product_id': item['product'], | |
|                                 'product_uom_qty': item['quantity'], | |
|                                 'price_unit': item['price'] | |
|                             }) | |
|                 else: | |
|                     non_field_value = 1 | |
|                 if sale_order: | |
|                     return { | |
|                         'name': "Sale order", | |
|                         'type': 'ir.actions.act_window', | |
|                         'view_type': 'form', | |
|                         'view_mode': 'form', | |
|                         'res_model': 'sale.order', | |
|                         'res_id': sale_order.id, | |
|                         'view_id': self.env.ref('sale.view_order_form').id, | |
|                         'target': 'current', | |
|                     } | |
|             if not field_value and non_field_value == 1: | |
|                 raise ValidationError(_("No data to map into the field")) | |
|         elif self.model_name_id.name == 'Purchase Order': | |
|             # Mapping the data from PDF with proper format into Purchase Order. | |
|             field_value = False | |
|             purchase_order = '' | |
|             non_field_value = 0 | |
|             partner = False | |
|             for field in self.model_field_ids: | |
|                 if field.name == 'order_line': | |
|                     person = self.find_person_name() | |
|                     if person: | |
|                         partner = self.env['hr.employee'].search( | |
|                             [('name', '=', person)], limit=1) | |
|                         if not partner: | |
|                             partner = self.env['hr.employee'].create({ | |
|                                 'name': person, | |
|                             }) | |
|                     # Calling the function to get order lines. | |
|                     product_line = self.get_order_line(self.data) | |
|                     purchase_order = self.env['purchase.order'].create({ | |
|                         'partner_id': partner.id, | |
|                     }) | |
|                     if product_line: | |
|                         for item in product_line: | |
|                             if 'quantity' not in item.keys(): | |
|                                 item.update({'quantity': 0}) | |
|                             if 'price' not in item.keys(): | |
|                                 item.update({'price': 0}) | |
|                             product = self.env['product.product'].search( | |
|                                 [('name', '=', item['product'])], limit=1) | |
|                             if not product: | |
|                                 product = self.env['product.product'].create({ | |
|                                     'name': item['product'] | |
|                                 }) | |
|                             item.update({'product': product.id}) | |
|                             self.env['purchase.order.line'].create({ | |
|                                 'order_id': purchase_order.id, | |
|                                 'product_id': item['product'], | |
|                                 'product_uom_qty': item['quantity'], | |
|                                 'price_unit': item['price'] | |
|                             }) | |
|                 else: | |
|                     non_field_value = 1 | |
|                 if purchase_order: | |
|                     return { | |
|                         'name': "Purchase order", | |
|                         'type': 'ir.actions.act_window', | |
|                         'view_type': 'form', | |
|                         'view_mode': 'form', | |
|                         'res_model': 'purchase.order', | |
|                         'res_id': purchase_order.id, | |
|                         'view_id': self.env.ref( | |
|                             'purchase.purchase_order_form').id, | |
|                         'target': 'current', | |
|                     } | |
|             if not field_value and non_field_value == 1: | |
|                 raise ValidationError(_("No data to map into the field")) | |
| 
 | |
|     @api.onchange('image') | |
|     def _onchange_image(self): | |
|         self.write({ | |
|             'image2': self.image | |
|         })
 | |
| 
 |