diff --git a/ocr_data_retrieval/README.rst b/ocr_data_retrieval/README.rst new file mode 100755 index 000000000..715af2420 --- /dev/null +++ b/ocr_data_retrieval/README.rst @@ -0,0 +1,48 @@ +.. image:: https://img.shields.io/badge/licence-LGPL--3-green.svg + :target: https://www.gnu.org/licenses/lgpl-3.0-standalone.html + :alt: License: LGPL-3 + +OCR Data Retrieval +================== +This module allows user to retrieve data from scanned documents and images. + +Configuration +============= +- www.odoo.com/documentation/16.0/setup/install.html +- Install our custom addon +- Install pdf2image, PIL, pytesseract, spacy(version 3.5.2), en_core_web_sm + +Company +------- +* `Cybrosys Techno Solutions `__ + +License +------- +General Public License, Version 3 (LGPL v3). +(https://www.gnu.org/licenses/lgpl-3.0-standalone.html) + +Credits +------- +Developer: (V15) Sruthi Renjith, Contact: odoo@cybrosys.com + +Contacts +-------- +* Mail Contact : odoo@cybrosys.com +* Website : https://cybrosys.com + +Bug Tracker +----------- +Bugs are tracked on GitHub Issues. In case of trouble, please check there if your issue has already been reported. + +Maintainer +========== +.. image:: https://cybrosys.com/images/logo.png + :target: https://cybrosys.com + +This module is maintained by Cybrosys Technologies. + +For support and more information, please visit `Our Website `__ + +Further information +=================== +HTML Description: ``__ \ No newline at end of file diff --git a/ocr_data_retrieval/__init__.py b/ocr_data_retrieval/__init__.py new file mode 100644 index 000000000..41ad3e5b0 --- /dev/null +++ b/ocr_data_retrieval/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2024-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +from . import models diff --git a/ocr_data_retrieval/__manifest__.py b/ocr_data_retrieval/__manifest__.py new file mode 100644 index 000000000..360596cac --- /dev/null +++ b/ocr_data_retrieval/__manifest__.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2024-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +{ + 'name': 'OCR Data Retrieval', + 'version': '15.0.1.0.0', + 'category': 'Productivity', + 'summary': """Data retrieval from scanned documents with .jpg, .jpeg, .png + and .pdf file format""", + 'description': """OCR data retrieval from scanned documents is the process + of extracting machine-readable text and potentially other relevant + information from physical or digital images of documents.""", + 'author': "Cybrosys Techno Solutions", + 'company': 'Cybrosys Techno Solutions', + 'maintainer': 'Cybrosys Techno Solutions', + 'website': "https://www.cybrosys.com", + 'depends': ['base', 'hr_expense', 'bill_digitization', 'contacts', + 'purchase', 'sale_management'], + 'assets': { + 'web.assets_backend': [ + '/ocr_data_retrieval/static/src/js/image_field.js', + ], + }, + 'data': ['security/ir.model.access.csv', + 'views/ocr_data_template_views.xml'], + 'external_dependencies': { + 'python': ['pdf2image', 'PIL', 'pytesseract', 'spacy', + 'en_core_web_sm'] + }, + 'images': ['static/description/banner.jpg'], + 'license': 'LGPL-3', + 'installable': True, + 'auto_install': False, + 'application': False, +} diff --git a/ocr_data_retrieval/doc/RELEASE_NOTES.md b/ocr_data_retrieval/doc/RELEASE_NOTES.md new file mode 100755 index 000000000..045e17188 --- /dev/null +++ b/ocr_data_retrieval/doc/RELEASE_NOTES.md @@ -0,0 +1,5 @@ +## Module +#### 10.01.2024 +#### Version 15.0.1.0.0 +#### ADD +- Initial commit for OCR Data Retrieval diff --git a/ocr_data_retrieval/models/__init__.py b/ocr_data_retrieval/models/__init__.py new file mode 100644 index 000000000..2763b73d9 --- /dev/null +++ b/ocr_data_retrieval/models/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2024-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +from . import ocr_data_template diff --git a/ocr_data_retrieval/models/ocr_data_template.py b/ocr_data_retrieval/models/ocr_data_template.py new file mode 100644 index 000000000..83b502023 --- /dev/null +++ b/ocr_data_retrieval/models/ocr_data_template.py @@ -0,0 +1,508 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2024-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +import io +import os +from pdf2image import convert_from_bytes +from PIL import Image, ImageOps +import pytesseract +import re +import spacy +from odoo import api, fields, models, _ +from odoo.exceptions import ValidationError + + +class OCRDataTemplate(models.TransientModel): + """ Class to read document and extract the text from JPG, JPEG, PNG and + PDF files. """ + _name = "ocr.data.template" + _description = "Data retrieving template" + _rec_name = "file_name" + + image = fields.Binary(string="Document", attachment=True, required=True, + help="Upload .jpg, .jpeg, .png or .pdf files") + file_name = fields.Char(string="Document Name", help="Document name") + image2 = fields.Image(string="Document", + help="Uploaded document") + flag = fields.Boolean(default=False, string="Is Read", + help="Flag to check the document read or not") + data = fields.Text(string="Data", readonly=True, + help="Content from the document") + model_name_id = fields.Many2one( + 'ir.model', string="Model", + domain="[" + "('model', 'in', ['res.partner', 'account.move', 'hr.employee'," + " 'hr.expense', 'sale.order', 'purchase.order'])]", + help="Model to which the data want to map") + model_field_ids = fields.Many2many( + 'ir.model.fields', string="Fields", + domain="[('model_id', '=', model_name_id)]", + help="Fields names to map data") + + def data_segmentation(self, img): + """ + Function to do segmentation for the retrieved data after converting it + into image. + :param img: The image format of the document that need to undergo the + segmentation procedure. + :return: The segments of the image. + """ + img = ImageOps.grayscale(img) + img = img.point(lambda x: 255 if x > 176 else 0, '1') + img_rgb = ImageOps.invert(img.convert("RGB")) + segments = [] + segment_bounds = img_rgb.getbbox() + while segment_bounds: + segment = img_rgb.crop(segment_bounds) + if segment.size[0] > 0 and segment.size[1] > 0: + segments.append(segment) + img_rgb = ImageOps.crop(img_rgb, segment_bounds) + segment_bounds = img_rgb.getbbox() + return segments + + def action_get_data(self): + """ + Function to get the files in .jpg, .jpeg, .png and .pdf formats. + """ + self.flag = True + split_tup = os.path.splitext(self.file_name) + try: + # Getting the file path from ir.attachments. + file_attachment = self.env["ir.attachment"].search( + ['|', ('res_field', '!=', False), ('res_field', '=', False), + ('res_id', '=', self.id), + ('res_model', '=', 'ocr.data.template')], + limit=1) + file_path = file_attachment._full_path(file_attachment.store_fname) + segmented_data = [] + # Reading files in the format .jpg, .jpeg and .png. + if split_tup[1] == '.jpg' or split_tup[1] == '.jpeg' or split_tup[ + 1] == '.png': + with open(file_path, mode='rb') as f: + binary_data = f.read() + img = Image.open(io.BytesIO(binary_data)) + # Calling the function to do segmentation. + segmented_data = self.data_segmentation(img) + elif split_tup[1] == '.pdf': + # Reading files in the format .pdf. + with open(file_path, mode='rb') as f: + pdf_data = f.read() + pages = convert_from_bytes(pdf_data) + # Making the contents in 2 or more pages into combined page. + max_width = max(page.width for page in pages) + total_height = sum(page.height for page in pages) + resized_images = [page.resize((2400, 1800)) for page in pages] + combined_image = Image.new('RGB', (max_width, total_height)) + y_offset = 0 + for resized_page in resized_images: + combined_image.paste(resized_page, (0, y_offset)) + y_offset += resized_page.height + # Calling the segmentation function. + segmented_data = self.data_segmentation(combined_image) + except Exception: + self.env['ocr.data.template'].search([], order="id desc", + limit=1).unlink() + raise ValidationError(_("Cannot identify data")) + # Converting the segmented image into text using pytesseract. + text = "" + for segment in segmented_data: + try: + text += pytesseract.image_to_string(segment) + "\n" + break + except Exception: + raise ValidationError(_("Data cannot be read")) + # Assigning retrieved data into text field. + self.data = text + + @api.onchange('model_name_id') + def _onchange_model_name_id(self): + """ Function to update the Many2many field to empty """ + self.write({'model_field_ids': [(6, 0, [])]}) + + def find_person_name(self): + """ + Function to find person name from the retrieved text using 'spacy' + """ + person = '' + nlp = spacy.load("en_core_web_sm") + doc = nlp(self.data) + for entity in doc.ents: + if entity.label_ == "PERSON": + person = entity.text + break + return person + + def get_order_line(self, text): + """ + Function to find product lines from retrieved data using regex. + :param text: The extracted text to find the order lines from it + :return: The order lines found from text + """ + product_line_list = [] + quantities = [] + unit_prices = [] + product_regex = r'\[?(.+?)\]?\s*(.+)\n(?:HSN/SAC Code):\s+(\d+)' + quantity_regex = r"Quantity Unit\n([\d.\s\S]+)" + unit_price_regex = r"Amount\n([\d.\s\S]+)" + # Matching the pattern with the data. + quantity_match = re.search(quantity_regex, text) + price_match = re.search(unit_price_regex, text) + if quantity_match: + quantity_unit_text = quantity_match.group(1) + # If matched finding a particular pattern for quantities + # form that group. + quantities = re.findall(r"\d+\.\d+", quantity_unit_text) + if price_match: + price_unit_text = price_match.group(1) + # If matched finding a particular pattern for unit price + # form that group. + unit_prices = re.findall(r"\d+\.\d+", price_unit_text) + # Finding the data that matches the pattern for products. + products = re.findall(product_regex, text) + number_of_product = len(products) + number_of_qty = len(quantities) + number_of_price = len(unit_prices) + # Getting the products and its corresponding quantity and price. + if number_of_product == number_of_qty == number_of_price: + product_line_list = [ + {'product': products[i], 'quantity': quantities[i], + 'price': unit_prices[i]} + for i in range(number_of_product)] + elif number_of_product == number_of_qty: + product_line_list = [ + {'product': products[i], 'quantity': quantities[i]} + for i in range(number_of_product)] + elif number_of_product == number_of_price: + product_line_list = [ + {'product': products[i], 'price': unit_prices[i]} + for i in range(number_of_product)] + elif products: + product_line_list = [{'product': products[i]} for i in range(number_of_product)] + return product_line_list + + def action_process_data(self): + """ + Function to process the data after fetching it. + The fetched data are mapping into some models. + """ + phone_number = '' + email_address = '' + person = '' + phone_pattern = r'\(\d{3}\) \d{3}-\d{4}|\d{3}-\d{3}-\d{4}|\+\d{1}-\d{3}-\d{3}-\d{4}|\d{11}|P \+\d{3} \d{6}' + email_pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}' + if self.model_name_id.name == 'Contact': + # Mapping the data into Contact module by fetching person name, + # phone number and email id from data. + field_value = False + non_field_count = 0 + for field in self.model_field_ids: + if field.name == 'name' or field.name == 'display_name': + person = self.find_person_name() + if not person: + raise ValidationError(_("Partner name cannot find")) + field_value = True + elif field.name == 'phone': + phone = re.findall(phone_pattern, self.data) + if phone: + phone_number = phone[0] + elif field.name == 'email': + email = re.findall(email_pattern, self.data) + if email: + email_address = email[0] + else: + non_field_count = 1 + if not field_value and non_field_count == 1: + raise ValidationError(_("No data to map into the field")) + if person: + partner = self.env['res.partner'].search( + [('name', '=', person)], limit=1) + if not partner: + # Creating record in res.partner. + partner_record = self.env['res.partner'].create({ + 'name': person, + 'email': email_address, + 'phone': phone_number + }) + else: + raise ValidationError(_("Partner already exist")) + else: + raise ValidationError(_("Name field is not chosen to create" + " partner")) + if partner_record: + return { + 'name': "Partner", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'res.partner', + 'res_id': partner_record.id, + 'view_id': self.env.ref('base.view_partner_form').id, + 'target': 'current', + } + elif self.model_name_id.name == 'Journal Entry': + # Mapping data into Journal Entry. Creating a record in vendor bill + vendor_bill_flag = False + for field in self.model_field_ids: + # Taking the file path from ir.attachment. + if field.name == 'invoice_vendor_bill_id': + try: + file_attachment = self.env["ir.attachment"].search( + ['|', ('res_field', '!=', False), + ('res_field', '=', False), + ('res_id', '=', self.id), + ('res_model', '=', 'ocr.data.template')], + limit=1) + file_path = file_attachment._full_path( + file_attachment.store_fname) + with open(file_path, mode='rb') as f: + binary_data = f.read() + img = Image.open(io.BytesIO(binary_data)) + # Resizing the image to improve the clarity. + resized_img = img.resize( + (img.width * 2, img.height * 2), + resample=Image.BICUBIC) + except Exception: + raise ValidationError(_("Can't create vendor bill")) + # Converting the image into text using OCR python package + # pytesseract. + try: + text = pytesseract.image_to_string(resized_img) + except Exception: + raise ValidationError(_("Can't create vendor bill")) + bill = self.env['digitize.bill'] + # Calling the function to create vendor bill + # from model digitize.bill. + bill_record = bill.create_record(text) + return { + 'name': "Bill", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'account.move', + 'res_id': bill_record.id, + 'view_id': self.env.ref('account.view_move_form').id, + 'target': 'current', + } + if not vendor_bill_flag: + raise ValidationError(_("No data to map into the field")) + elif self.model_name_id.name == 'Employee': + # Mapping the data into Employee module by fetching person name, + # phone number and email. + field_value = False + non_field_count = 0 + for field in self.model_field_ids: + if field.name == 'name' or field.name == 'display_name' or \ + field.name == 'emergency_contact': + person = self.find_person_name() + if not person: + raise ValidationError(_("Employee name cannot find")) + field_value = True + elif field.name == 'work_phone' or field.name == 'phone' or \ + field.name == 'emergency_phone': + phone = re.findall(phone_pattern, self.data) + if phone: + phone_number = phone[0] + elif field.name == 'private_email' or \ + field.name == 'work_email': + email = re.findall(email_pattern, self.data) + if email: + email_address = email[0] + else: + non_field_count = 1 + if not field_value and non_field_count == 1: + raise ValidationError(_("No data to map into the field")) + if person: + partner = self.env['hr.employee'].search( + [('name', '=', person)], limit=1) + if not partner: + # Creating a record in hr.employee by mapping the + # data into employee name, work phone and work email. + employee_record = self.env['hr.employee'].create({ + 'name': person, + 'work_email': email_address, + 'work_phone': phone_number + }) + else: + raise ValidationError(_("Employee already exist")) + else: + raise ValidationError( + _("Name field is not chosen to create employee")) + if employee_record: + return { + 'name': "Employee", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'hr.employee', + 'res_id': employee_record.id, + 'view_id': self.env.ref('hr.view_employee_form').id, + 'target': 'current', + } + elif self.model_name_id.name == 'Expense': + # Mapping the data into Expense module. + expense_product = False + for field in self.model_field_ids: + if field.name == 'name' or field.name == 'product_id': + product = self.env['product.product'].search( + [('name', '=', 'BILL EXPENSE')], limit=1) + if not product: + product = self.env['product.product'].create({ + 'name': 'BILL EXPENSE', + }) + pattern = r'\b\d+(?:\.\d{1,2})?\b' + matches = re.findall(pattern, self.data) + total_amount = float(matches[0]) if matches else 0.0 + expense_record = self.env['hr.expense'].create({ + 'name': product.name, + 'product_id': product.id, + 'unit_amount': total_amount + }) + return { + 'name': "Expense", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'hr.expense', + 'res_id': expense_record.id, + 'view_id': self.env.ref( + 'hr_expense.hr_expense_view_form').id, + 'target': 'current', + } + if not expense_product: + raise ValidationError(_("Can't create an expense without " + "description or category")) + elif self.model_name_id.name == 'Sales Order': + # Mapping the data from PDF with proper format into Sale Order. + partner = False + sale_order = '' + field_value = False + non_field_value = 0 + for field in self.model_field_ids: + if field.name == 'order_line': + person = self.find_person_name() + if person: + partner = self.env['hr.employee'].search( + [('name', '=', person)], limit=1) + if not partner: + partner = self.env['hr.employee'].create({ + 'name': person, + }) + # Calling the function to get order lines. + product_line = self.get_order_line(self.data) + sale_order = self.env['sale.order'].create({ + 'partner_id': partner.id, + }) + if product_line: + for item in product_line: + if 'quantity' not in item.keys(): + item.update({'quantity': 0}) + if 'price' not in item.keys(): + item.update({'price': 0}) + product = self.env['product.product'].search( + [('name', '=', item['product'])], limit=1) + if not product: + product = self.env['product.product'].create({ + 'name': item['product'] + }) + item.update({'product': product.id}) + self.env['sale.order.line'].create({ + 'order_id': sale_order.id, + 'product_id': item['product'], + 'product_uom_qty': item['quantity'], + 'price_unit': item['price'] + }) + else: + non_field_value = 1 + if sale_order: + return { + 'name': "Sale order", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'sale.order', + 'res_id': sale_order.id, + 'view_id': self.env.ref('sale.view_order_form').id, + 'target': 'current', + } + if not field_value and non_field_value == 1: + raise ValidationError(_("No data to map into the field")) + elif self.model_name_id.name == 'Purchase Order': + # Mapping the data from PDF with proper format into Purchase Order. + field_value = False + purchase_order = '' + non_field_value = 0 + partner = False + for field in self.model_field_ids: + if field.name == 'order_line': + person = self.find_person_name() + if person: + partner = self.env['hr.employee'].search( + [('name', '=', person)], limit=1) + if not partner: + partner = self.env['hr.employee'].create({ + 'name': person, + }) + # Calling the function to get order lines. + product_line = self.get_order_line(self.data) + purchase_order = self.env['purchase.order'].create({ + 'partner_id': partner.id, + }) + if product_line: + for item in product_line: + if 'quantity' not in item.keys(): + item.update({'quantity': 0}) + if 'price' not in item.keys(): + item.update({'price': 0}) + product = self.env['product.product'].search( + [('name', '=', item['product'])], limit=1) + if not product: + product = self.env['product.product'].create({ + 'name': item['product'] + }) + item.update({'product': product.id}) + self.env['purchase.order.line'].create({ + 'order_id': purchase_order.id, + 'product_id': item['product'], + 'product_uom_qty': item['quantity'], + 'price_unit': item['price'] + }) + else: + non_field_value = 1 + if purchase_order: + return { + 'name': "Purchase order", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'purchase.order', + 'res_id': purchase_order.id, + 'view_id': self.env.ref( + 'purchase.purchase_order_form').id, + 'target': 'current', + } + if not field_value and non_field_value == 1: + raise ValidationError(_("No data to map into the field")) + + @api.onchange('image') + def _onchange_image(self): + self.write({ + 'image2': self.image + }) diff --git a/ocr_data_retrieval/security/ir.model.access.csv b/ocr_data_retrieval/security/ir.model.access.csv new file mode 100644 index 000000000..754945175 --- /dev/null +++ b/ocr_data_retrieval/security/ir.model.access.csv @@ -0,0 +1,2 @@ +id,name,model_id/id,group_id/id,perm_read,perm_write,perm_create,perm_unlink +access_ocr_data_template_user,access.ocr.data.template.user,model_ocr_data_template,base.group_user,1,1,1,1 diff --git a/ocr_data_retrieval/static/description/assets/demo/card.jpg b/ocr_data_retrieval/static/description/assets/demo/card.jpg new file mode 100644 index 000000000..289745642 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/demo/card.jpg differ diff --git a/ocr_data_retrieval/static/description/assets/demo/purchase_order.pdf b/ocr_data_retrieval/static/description/assets/demo/purchase_order.pdf new file mode 100644 index 000000000..96096d802 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/demo/purchase_order.pdf differ diff --git a/ocr_data_retrieval/static/description/assets/demo/sale_order.pdf b/ocr_data_retrieval/static/description/assets/demo/sale_order.pdf new file mode 100644 index 000000000..7fd5ad1df Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/demo/sale_order.pdf differ diff --git a/ocr_data_retrieval/static/description/assets/icons/cogs.png b/ocr_data_retrieval/static/description/assets/icons/cogs.png new file mode 100755 index 000000000..95d0bad62 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/cogs.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/consultation.png b/ocr_data_retrieval/static/description/assets/icons/consultation.png new file mode 100755 index 000000000..8319d4baa Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/consultation.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/ecom-black.png b/ocr_data_retrieval/static/description/assets/icons/ecom-black.png new file mode 100755 index 000000000..a9385ff13 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/ecom-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/education-black.png b/ocr_data_retrieval/static/description/assets/icons/education-black.png new file mode 100755 index 000000000..3eb09b27b Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/education-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/hotel-black.png b/ocr_data_retrieval/static/description/assets/icons/hotel-black.png new file mode 100755 index 000000000..130f613be Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/hotel-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/license.png b/ocr_data_retrieval/static/description/assets/icons/license.png new file mode 100755 index 000000000..a5869797e Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/license.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/lifebuoy.png b/ocr_data_retrieval/static/description/assets/icons/lifebuoy.png new file mode 100755 index 000000000..658d56ccc Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/lifebuoy.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/manufacturing-black.png b/ocr_data_retrieval/static/description/assets/icons/manufacturing-black.png new file mode 100755 index 000000000..697eb0e9f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/manufacturing-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/pos-black.png b/ocr_data_retrieval/static/description/assets/icons/pos-black.png new file mode 100755 index 000000000..97c0f90c1 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/pos-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/puzzle.png b/ocr_data_retrieval/static/description/assets/icons/puzzle.png new file mode 100755 index 000000000..65cf854e7 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/puzzle.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/restaurant-black.png b/ocr_data_retrieval/static/description/assets/icons/restaurant-black.png new file mode 100755 index 000000000..4a35eb939 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/restaurant-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/service-black.png b/ocr_data_retrieval/static/description/assets/icons/service-black.png new file mode 100755 index 000000000..301ab51cb Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/service-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/trading-black.png b/ocr_data_retrieval/static/description/assets/icons/trading-black.png new file mode 100755 index 000000000..9398ba2f1 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/trading-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/training.png b/ocr_data_retrieval/static/description/assets/icons/training.png new file mode 100755 index 000000000..884ca024d Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/training.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/update.png b/ocr_data_retrieval/static/description/assets/icons/update.png new file mode 100755 index 000000000..ecbc5a01a Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/update.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/user.png b/ocr_data_retrieval/static/description/assets/icons/user.png new file mode 100755 index 000000000..6ffb23d9f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/user.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/wrench.png b/ocr_data_retrieval/static/description/assets/icons/wrench.png new file mode 100755 index 000000000..6c04dea0f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/wrench.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/categories.png b/ocr_data_retrieval/static/description/assets/misc/categories.png new file mode 100755 index 000000000..bedf1e0b1 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/categories.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/check-box.png b/ocr_data_retrieval/static/description/assets/misc/check-box.png new file mode 100755 index 000000000..42caf24b9 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/check-box.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/compass.png b/ocr_data_retrieval/static/description/assets/misc/compass.png new file mode 100755 index 000000000..d5fed8faa Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/compass.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/config.png b/ocr_data_retrieval/static/description/assets/misc/config.png new file mode 100755 index 000000000..71915e76c Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/config.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/corporate.png b/ocr_data_retrieval/static/description/assets/misc/corporate.png new file mode 100755 index 000000000..2eb13edbf Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/corporate.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/customer-support.png b/ocr_data_retrieval/static/description/assets/misc/customer-support.png new file mode 100755 index 000000000..79efc72ed Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/customer-support.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/features.png b/ocr_data_retrieval/static/description/assets/misc/features.png new file mode 100755 index 000000000..b41769f77 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/features.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/logo.png b/ocr_data_retrieval/static/description/assets/misc/logo.png new file mode 100644 index 000000000..cc3cc0ccf Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/logo.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/pictures.png b/ocr_data_retrieval/static/description/assets/misc/pictures.png new file mode 100755 index 000000000..56d255fe9 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/pictures.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/pie-chart.png b/ocr_data_retrieval/static/description/assets/misc/pie-chart.png new file mode 100755 index 000000000..426e05244 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/pie-chart.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/right-arrow.png b/ocr_data_retrieval/static/description/assets/misc/right-arrow.png new file mode 100755 index 000000000..730984a06 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/right-arrow.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/star.png b/ocr_data_retrieval/static/description/assets/misc/star.png new file mode 100755 index 000000000..2eb9ab29f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/star.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/support.png b/ocr_data_retrieval/static/description/assets/misc/support.png new file mode 100755 index 000000000..4f18b8b82 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/support.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/whatsapp.png b/ocr_data_retrieval/static/description/assets/misc/whatsapp.png new file mode 100755 index 000000000..d513a5356 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/whatsapp.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/1.png b/ocr_data_retrieval/static/description/assets/modules/1.png new file mode 100644 index 000000000..922179d26 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/1.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/2.png b/ocr_data_retrieval/static/description/assets/modules/2.png new file mode 100644 index 000000000..f01b10060 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/2.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/3.png b/ocr_data_retrieval/static/description/assets/modules/3.png new file mode 100644 index 000000000..3dc1faa62 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/3.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/4.png b/ocr_data_retrieval/static/description/assets/modules/4.png new file mode 100644 index 000000000..996801f09 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/4.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/5.png b/ocr_data_retrieval/static/description/assets/modules/5.png new file mode 100644 index 000000000..a845426b0 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/5.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/6.jpg b/ocr_data_retrieval/static/description/assets/modules/6.jpg new file mode 100644 index 000000000..960808f42 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/6.jpg differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/contact_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/contact_mapping.png new file mode 100644 index 000000000..501741329 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/contact_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/employee_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/employee_mapping.png new file mode 100644 index 000000000..7b262fc00 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/employee_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/expense_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/expense_mapping.png new file mode 100644 index 000000000..077bd545f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/expense_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/hero.gif b/ocr_data_retrieval/static/description/assets/screenshots/hero.gif new file mode 100644 index 000000000..96687f4aa Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/hero.gif differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/ocr_menu.png b/ocr_data_retrieval/static/description/assets/screenshots/ocr_menu.png new file mode 100644 index 000000000..919edf5c0 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/ocr_menu.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/purchase_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/purchase_mapping.png new file mode 100644 index 000000000..6e7047f2f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/purchase_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/upload_form.png b/ocr_data_retrieval/static/description/assets/screenshots/upload_form.png new file mode 100644 index 000000000..66703ffa4 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/upload_form.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/vendor_bill.png b/ocr_data_retrieval/static/description/assets/screenshots/vendor_bill.png new file mode 100644 index 000000000..5e3c76560 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/vendor_bill.png differ diff --git a/ocr_data_retrieval/static/description/banner.jpg b/ocr_data_retrieval/static/description/banner.jpg new file mode 100644 index 000000000..0dd0f197d Binary files /dev/null and b/ocr_data_retrieval/static/description/banner.jpg differ diff --git a/ocr_data_retrieval/static/description/icon.png b/ocr_data_retrieval/static/description/icon.png new file mode 100644 index 000000000..7070b712b Binary files /dev/null and b/ocr_data_retrieval/static/description/icon.png differ diff --git a/ocr_data_retrieval/static/description/index.html b/ocr_data_retrieval/static/description/index.html new file mode 100755 index 000000000..0d7bcb42e --- /dev/null +++ b/ocr_data_retrieval/static/description/index.html @@ -0,0 +1,746 @@ +
+ +
+ +
+
+ Community +
+
+
+ + +
+
+
+ +

+ OCR Data Retrieval

+

+ Retrieving the Contents from Images and PDF Files

+
+ + +
+
+
+ + +
+
+ +
+

+ Explore This + Module

+
+ + + + +
+
+ +
+

+ Overview +

+
+
+
+ OCR data retrieval module allows user to read data from images anf PDF + files. + The module make use of OCR (Optical Character Recognition) that enables + the extraction of text from scanned documents. +
+
+ + + +
+
+ +
+

+ Configuration +

+
+
+
+ Need to install the following python packages + 1. pdf2image (pip install pdf2image) + 2. PIL (pip3 install PIL) + 3. pytesseract (pip3 install pytesseract) + 4. spacy (pip3 install spacy==3.5.2) + 5. en_core_web_sm (python3.8 -m spacy download en_core_web_sm) + 6. sudo apt-get update && sudo apt-get install tesseract-ocr +
+
+ + + +
+
+ +
+

+ Features +

+
+
+
+
+ +
+ Retrieving Data from Images with .jpg, .jpeg, .png Format +
+
+
+ +
+ Retrieving Data from .pdf Format +
+
+ +
+ +
+ Can Map Data into Contact Module +
+
+
+ +
+ Can Map Data into Employee and Expense Module +
+
+ +
+ +
+ Can Map Data into Vendor Bill Module +
+
+ +
+ +
+ Can Map Data into Sales and Purchase Module +
+
+
+
+ + + +
+
+ +
+

+ Screenshots +

+
+
+
+ +
+

+ OCR Data Retrieval Menu Option

+

+ Menu that direct to the data retrieval.

+ +
+
+

+ Data Retrieval Template

+

+ Form to read file and retrieve data

+ +
+
+

+ OCR Data Retrieval from Bill

+

+ Reading content from scanned bill and mapping into Vendor + Bill

+ +
+
+

+ OCR Data Retrieval from Personal Cards

+

+ Reading content from personal documents and mapping into + Contact module

+ +
+
+

+ OCR Data Mapping into Employee Module

+

+ Reading content from personal documents and mapping into + Employee module

+ +
+
+

+ OCR Data Mapping into Purchase Order

+

+ Reading content from PDF file and mapping into Purchase + Order

+ +
+
+

+ OCR Data Mapping into Expense Module

+

+ Reading content from bills and mapping into Expense module

+ +
+
+

+ (The example format for sale order PDF and purchase order PDF + is added in the + module.)

+
+
+
+ + + +
+
+ +
+

+ Suggested + Products +

+
+
+
+ +
+
+ + + +
+
+ +
+

+ Our Services +

+
+ +
+
+
+
+ +
+
+ Odoo + Customization
+
+ +
+
+ +
+
+ Odoo + Implementation
+
+ +
+
+ +
+
+ Odoo + Support
+
+ + +
+
+ +
+
+ Hire + Odoo + Developer
+
+ +
+
+ +
+
+ Odoo + Integration
+
+ +
+
+ +
+
+ Odoo + Migration
+
+ + +
+
+ +
+
+ Odoo + Consultancy
+
+ +
+
+ +
+
+ Odoo + Implementation
+
+ +
+
+ +
+
+ Odoo + Licensing Consultancy
+
+
+
+ + + +
+
+ +
+

+ Our + Industries +

+
+ +
+
+
+
+ +
+ Trading +
+

+ Easily procure + and + sell your products

+
+
+ +
+
+ +
+ POS +
+

+ Easy + configuration + and convivial experience

+
+
+ +
+
+ +
+ Education +
+

+ A platform for + educational management

+
+
+ +
+
+ +
+ Manufacturing +
+

+ Plan, track and + schedule your operations

+
+
+ +
+
+ +
+ E-commerce & Website +
+

+ Mobile + friendly, + awe-inspiring product pages

+
+
+ +
+
+ +
+ Service Management +
+

+ Keep track of + services and invoice

+
+
+ +
+
+ +
+ Restaurant +
+

+ Run your bar or + restaurant methodically

+
+
+ +
+
+ +
+ Hotel Management +
+

+ An + all-inclusive + hotel management application

+
+
+
+
+ + + +
+
+ +
+

+ Support +

+
+
+
+
+
+
+ +
+
+

Need Help?

+

Got questions or need help? + Get in touch.

+ +

+ odoo@cybrosys.com

+
+
+
+
+
+
+
+ +
+
+

WhatsApp

+

Say hi to us on WhatsApp!

+ +

+ +91 86068 + 27707

+
+
+
+
+
+
+
+ +
+
+
+ \ No newline at end of file diff --git a/ocr_data_retrieval/static/src/img/pdf.png b/ocr_data_retrieval/static/src/img/pdf.png new file mode 100644 index 000000000..01b150888 Binary files /dev/null and b/ocr_data_retrieval/static/src/img/pdf.png differ diff --git a/ocr_data_retrieval/static/src/js/image_field.js b/ocr_data_retrieval/static/src/js/image_field.js new file mode 100644 index 000000000..b11fdad46 --- /dev/null +++ b/ocr_data_retrieval/static/src/js/image_field.js @@ -0,0 +1,53 @@ +/** @odoo-module */ +import BasicFields from 'web.basic_fields'; +var FieldBinaryImage = BasicFields.FieldBinaryImage; +var core = require('web.core'); +var qweb = core.qweb; +var utils = require('web.utils'); + +FieldBinaryImage.include({ + _render: function () { + var self = this; + var url = this.placeholder; + if (this.recordData.file_name && this.recordData.file_name.endsWith('.pdf')){ + url = `/ocr_data_retrieval/static/src/img/pdf.png` + } + if (this.value) { + if (!utils.is_bin_size(this.value)) { + // Use magic-word technique for detecting image type + url = 'data:image/' + (this.file_type_magic_word[this.value[0]] || 'png') + ';base64,' + this.value; + } else { + var field = this.nodeOptions.preview_image || this.name; + var unique = this.recordData.__last_update; + url = this._getImageUrl(this.model, this.res_id, field, unique); + } + } + var $img = $(qweb.render("FieldBinaryImage-img", {widget: this, url: url})); + // override css size attributes (could have been defined in css files) + // if specified on the widget + var width = this.nodeOptions.size ? this.nodeOptions.size[0] : this.attrs.width; + var height = this.nodeOptions.size ? this.nodeOptions.size[1] : this.attrs.height; + if (width) { + $img.attr('width', width); + $img.css('max-width', width + 'px'); + if (!height) { + $img.css('height', 'auto'); + $img.css('max-height', '100%'); + } + } + if (height) { + $img.attr('height', height); + $img.css('max-height', height + 'px'); + if (!width) { + $img.css('width', 'auto'); + $img.css('max-width', '100%'); + } + } + this.$('> img').remove(); + this.$el.prepend($img); + $img.one('error', function () { + $img.attr('src', self.placeholder); + self.displayNotification({ message: _t("Could not display the selected image"), type: 'danger' }); + }); + }, +}); diff --git a/ocr_data_retrieval/views/ocr_data_template_views.xml b/ocr_data_retrieval/views/ocr_data_template_views.xml new file mode 100644 index 000000000..419afdfc6 --- /dev/null +++ b/ocr_data_retrieval/views/ocr_data_template_views.xml @@ -0,0 +1,80 @@ + + + + + ocr.data.template.view.tree + ocr.data.template + + + + + + + + + ocr.data.template.view.form + ocr.data.template + +
+ +
+
+
+ + + + + +
+
+
+
+
+ + + + +
+
+ + + + + + + + +