diff --git a/ocr_data_retrieval/README.rst b/ocr_data_retrieval/README.rst new file mode 100755 index 000000000..28a2c0710 --- /dev/null +++ b/ocr_data_retrieval/README.rst @@ -0,0 +1,48 @@ +.. image:: https://img.shields.io/badge/licence-LGPL--3-green.svg + :target: https://www.gnu.org/licenses/lgpl-3.0-standalone.html + :alt: License: LGPL-3 + +OCR Data Retrieval +================== +This module allows user to retrieve data from scanned documents and images. + +Configuration +============= +- www.odoo.com/documentation/16.0/setup/install.html +- Install our custom addon +- Install pdf2image, PIL, pytesseract, spacy, en_core_web_sm + +Company +------- +* `Cybrosys Techno Solutions `__ + +License +------- +General Public License, Version 3 (LGPL v3). +(https://www.gnu.org/licenses/lgpl-3.0-standalone.html) + +Credits +------- +Developer: (V16) Sruthi Renjith, Contact: odoo@cybrosys.com + +Contacts +-------- +* Mail Contact : odoo@cybrosys.com +* Website : https://cybrosys.com + +Bug Tracker +----------- +Bugs are tracked on GitHub Issues. In case of trouble, please check there if your issue has already been reported. + +Maintainer +========== +.. image:: https://cybrosys.com/images/logo.png + :target: https://cybrosys.com + +This module is maintained by Cybrosys Technologies. + +For support and more information, please visit `Our Website `__ + +Further information +=================== +HTML Description: ``__ \ No newline at end of file diff --git a/ocr_data_retrieval/__init__.py b/ocr_data_retrieval/__init__.py new file mode 100644 index 000000000..262eae7bc --- /dev/null +++ b/ocr_data_retrieval/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2023-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +from . import models diff --git a/ocr_data_retrieval/__manifest__.py b/ocr_data_retrieval/__manifest__.py new file mode 100644 index 000000000..359e15e55 --- /dev/null +++ b/ocr_data_retrieval/__manifest__.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2023-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +{ + 'name': 'OCR Data Retrieval', + 'version': '16.0.1.0.0', + 'category': 'Productivity', + 'summary': 'Data retrieval from scanned documents', + 'description': """Data retrieval from scanned documents with .jpg, + .jpeg, .png and .pdf files. Also mapping them to appropriate models""", + 'author': "Cybrosys Techno Solutions", + 'company': 'Cybrosys Techno Solutions', + 'maintainer': 'Cybrosys Techno Solutions', + 'website': "https://www.cybrosys.com", + 'depends': ['base', 'hr_expense', 'bill_digitization', 'contacts', + 'purchase'], + 'assets': { + 'web.assets_backend': [ + '/ocr_data_retrieval/static/src/js/image_field.js', + ], + }, + 'data': ['security/ir.model.access.csv', + 'views/ocr_data_template_views.xml'], + 'external_dependencies': { + 'python': ['pdf2image', 'PIL', 'pytesseract', 'spacy', + 'en_core_web_sm'] + }, + 'images': ['static/description/banner.jpg'], + 'license': 'LGPL-3', + 'installable': True, + 'auto_install': False, + 'application': False, + } diff --git a/ocr_data_retrieval/doc/RELEASE_NOTES.md b/ocr_data_retrieval/doc/RELEASE_NOTES.md new file mode 100755 index 000000000..22a29ed32 --- /dev/null +++ b/ocr_data_retrieval/doc/RELEASE_NOTES.md @@ -0,0 +1,6 @@ +## Module + +#### 10.01.2024 +#### Version 16.0.1.0.0 +#### ADD +- Initial commit for OCR Data Retrieval diff --git a/ocr_data_retrieval/models/__init__.py b/ocr_data_retrieval/models/__init__.py new file mode 100644 index 000000000..0120bfa11 --- /dev/null +++ b/ocr_data_retrieval/models/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2023-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +from . import ocr_data_template diff --git a/ocr_data_retrieval/models/ocr_data_template.py b/ocr_data_retrieval/models/ocr_data_template.py new file mode 100644 index 000000000..e38c82255 --- /dev/null +++ b/ocr_data_retrieval/models/ocr_data_template.py @@ -0,0 +1,507 @@ +# -*- coding: utf-8 -*- +############################################################################### +# +# Cybrosys Technologies Pvt. Ltd. +# +# Copyright (C) 2023-TODAY Cybrosys Technologies() +# Author: Sruthi Renjith (odoo@cybrosys.com) +# +# You can modify it under the terms of the GNU LESSER +# GENERAL PUBLIC LICENSE (LGPL v3), Version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU LESSER GENERAL PUBLIC LICENSE (LGPL v3) for more details. +# +# You should have received a copy of the GNU LESSER GENERAL PUBLIC LICENSE +# (LGPL v3) along with this program. +# If not, see . +# +############################################################################### +import io +import os +import pytesseract +import re +import spacy +from pdf2image import convert_from_bytes +from PIL import Image, ImageOps +from odoo import api, fields, models, _ +from odoo.exceptions import ValidationError + + +class OCRDataTemplate(models.TransientModel): + """ Class to read document and extract the text from JPG, JPEG, PNG and + PDF files. """ + _name = "ocr.data.template" + _description = "Data Retrieving Template" + _rec_name = "file_name" + + image = fields.Binary(string="Document", required=True, + help="Upload .jpg, .jpeg, .png or .pdf files") + file_name = fields.Char(string="Document Name", help="Name of document") + image2 = fields.Image(string="Document", + help="Uploaded document", store=True) + flag = fields.Boolean(string="Flag", default=False, + help="Flag to check document read or not") + data = fields.Text(string="Data", readonly=True, + help="Content from the document") + model_name_id = fields.Many2one( + 'ir.model', string="Model", + domain="[('model', 'in', ['res.partner', 'account.move', " + "'hr.employee', 'hr.expense', 'sale.order', " + "'purchase.order'])]", + help="Model to which the data want to map") + model_field_ids = fields.Many2many( + 'ir.model.fields', string="Fields", + domain="[('model_id', '=', model_name_id)]", + help="Fields names to map data") + + def data_segmentation(self, img): + """ + Function to do segmentation for the retrieved data after converting it + into image + """ + img = ImageOps.grayscale(img) + threshold_value = 176 + img = img.point(lambda x: 255 if x > threshold_value else 0, '1') + img_rgb = ImageOps.invert(img.convert("RGB")) + segments = [] + segment_bounds = img_rgb.getbbox() + while segment_bounds: + segment = img_rgb.crop(segment_bounds) + if segment.size[0] > 0 and segment.size[1] > 0: + segments.append(segment) + img_rgb = ImageOps.crop(img_rgb, segment_bounds) + segment_bounds = img_rgb.getbbox() + return segments + + def action_get_data(self): + """ + Function to get the files in .jpg, .jpeg, .png and .pdf formats + """ + split_tup = os.path.splitext(self.file_name) + try: + # Getting the file path from ir.attachments + file_attachment = self.env["ir.attachment"].search( + ['|', ('res_field', '!=', False), ('res_field', '=', False), + ('res_id', '=', self.id), + ('res_model', '=', 'ocr.data.template')], + limit=1) + file_path = file_attachment._full_path(file_attachment.store_fname) + segmented_data = [] + # Reading files in the format .jpg, .jpeg and .png + if (split_tup[1] == '.jpg' or split_tup[1] == '.jpeg' or + split_tup[1] == '.png'): + with open(file_path, mode='rb') as f: + binary_data = f.read() + img = Image.open(io.BytesIO(binary_data)) + # Calling the function to do segmentation + segmented_data = self.data_segmentation(img) + elif split_tup[1] == '.pdf': + # Reading files in the format .pdf + with open(file_path, mode='rb') as f: + pdf_data = f.read() + pages = convert_from_bytes(pdf_data) + # Making the contents in 2 or more pages into combined page + max_width = max(page.width for page in pages) + total_height = sum(page.height for page in pages) + resized_images = [] + for page in pages: + resized_page = page.resize((2400, 1800)) + resized_images.append(resized_page) + combined_image = Image.new('RGB', (max_width, total_height)) + y_offset = 0 + for resized_page in resized_images: + combined_image.paste(resized_page, (0, y_offset)) + y_offset += resized_page.height + # Calling the segmentation function + segmented_data = self.data_segmentation(combined_image) + except Exception: + self.env['ocr.data.template'].search([], order="id desc", + limit=1).unlink() + raise ValidationError(_("Cannot identify data")) + # Converting the segmented image into text using pytesseract + text = "" + for segment in segmented_data: + try: + text += pytesseract.image_to_string(segment) + "\n" + break + except Exception: + raise ValidationError(_("Data cannot be read")) + # Assigning retrieved data into text field + self.data = text + self.flag = True + + @api.onchange('model_name_id') + def onchange_model_name_id(self): + """ Function to update the Many2many field to empty """ + self.write({'model_field_ids': [(6, 0, [])]}) + + def find_person_name(self): + """ + Function to find person name from the retrieved text using 'spacy' + """ + person = '' + nlp = spacy.load("en_core_web_sm") + doc = nlp(self.data) + for entity in doc.ents: + if entity.label_ == "PERSON": + person = entity.text + break + return person + + def get_order_line(self, text): + """ + Function to find product lines from retrieved data using regex + """ + product_line_list = [] + quantities = [] + unit_prices = [] + product_regex = r'\[?(.+?)\]?\s*(.+)\n(?:HSN/SAC Code):\s+(\d+)' + quantity_regex = r"Quantity Unit\n([\d.\s\S]+)" + unit_price_regex = r"Amount\n([\d.\s\S]+)" + # Matching the pattern with the data + quantity_match = re.search(quantity_regex, text) + price_match = re.search(unit_price_regex, text) + if quantity_match: + quantity_unit_text = quantity_match.group(1) + # If matched finding a particular pattern for quantities + # form that group + quantities = re.findall(r"\d+\.\d+", quantity_unit_text) + if price_match: + price_unit_text = price_match.group(1) + # If matched finding a particular pattern for unit price + # form that group + unit_prices = re.findall(r"\d+\.\d+", price_unit_text) + # Finding the data that matches the pattern for products + products = re.findall(product_regex, text) + number_of_product = len(products) + number_of_qty = len(quantities) + number_of_price = len(unit_prices) + # Getting the products and its corresponding quantity and price + if number_of_product == number_of_qty == number_of_price: + for i in range(number_of_product): + product_line_list.append( + {'product': products[i], 'quantity': quantities[i], + 'price': unit_prices[i]}) + elif number_of_product == number_of_qty: + for i in range(number_of_product): + product_line_list.append( + {'product': products[i], 'quantity': quantities[i]}) + elif number_of_product == number_of_price: + for i in range(number_of_product): + product_line_list.append( + {'product': products[i], 'price': unit_prices[i]}) + elif products: + for i in range(number_of_product): + product_line_list.append({'product': products[i]}) + return product_line_list + + def action_process_data(self): + """ + Function to process the data after fetching it. + The fetched data are mapping into some models. + """ + phone_number = '' + email_address = '' + person = '' + phone_pattern = r'\(\d{3}\) \d{3}-\d{4}|\d{3}-\d{3}-\d{4}|\+\d{1}-\d{3}-\d{3}-\d{4}|\d{11}|P \+\d{3} \d{6}' + email_pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}' + if self.model_name_id.name == 'Contact': + # Mapping the data into Contact module by fetching person name, + # phone number and email id from data + field_value = False + non_field_count = 0 + for field in self.model_field_ids: + if field.name == 'name' or field.name == 'display_name': + person = self.find_person_name() + if not person: + raise ValidationError(_("Partner name cannot find")) + field_value = True + elif field.name == 'phone': + phone = re.findall(phone_pattern, self.data) + if phone: + phone_number = phone[0] + elif field.name == 'email': + email = re.findall(email_pattern, self.data) + if email: + email_address = email[0] + else: + non_field_count = 1 + if not field_value and non_field_count == 1: + raise ValidationError(_("No data to map into the field")) + if person: + partner = self.env['res.partner'].search( + [('name', '=', person)], limit=1) + if not partner: + # Creating record in res.partner + partner_record = self.env['res.partner'].create({ + 'name': person, + 'email': email_address, + 'phone': phone_number + }) + else: + raise ValidationError(_("Partner already exist")) + else: + raise ValidationError(_("Name field is not chosen to create" + " partner")) + if partner_record: + return { + 'name': "Partner", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'res.partner', + 'res_id': partner_record.id, + 'view_id': self.env.ref('base.view_partner_form').id, + 'target': 'current', + } + elif self.model_name_id.name == 'Journal Entry': + # Mapping data into Journal Entry. Creating a record in vendor bill + vendor_bill_flag = False + for field in self.model_field_ids: + # Taking the file path from ir.attachment + if field.name == 'invoice_vendor_bill_id': + vendor_bill_flag = True + try: + file_attachment = self.env["ir.attachment"].search( + ['|', ('res_field', '!=', False), + ('res_field', '=', False), + ('res_id', '=', self.id), + ('res_model', '=', 'ocr.data.template')], + limit=1) + file_path = file_attachment._full_path( + file_attachment.store_fname) + with open(file_path, mode='rb') as f: + binary_data = f.read() + img = Image.open(io.BytesIO(binary_data)) + # Resizing the image to improve the clarity + resized_img = img.resize( + (img.width * 2, img.height * 2), + resample=Image.BICUBIC) + except Exception: + raise ValidationError(_("Can't create vendor bill")) + # Converting the image into text using OCR python package + # pytesseract + try: + text = pytesseract.image_to_string(resized_img) + except Exception: + raise ValidationError(_("Can't create vendor bill")) + bill = self.env['digitize.bill'] + # Calling the function to create vendor bill + # from model digitize.bill + bill_record = bill.create_record(text) + return { + 'name': "Bill", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'account.move', + 'res_id': bill_record.id, + 'view_id': self.env.ref('account.view_move_form').id, + 'target': 'current', + } + if not vendor_bill_flag: + raise ValidationError(_("No data to map into the field")) + elif self.model_name_id.name == 'Employee': + # Mapping the data into Employee module by fetching person name, + # phone number and email + field_value = False + non_field_count = 0 + for field in self.model_field_ids: + if field.name == 'name' or field.name == 'display_name' or \ + field.name == 'emergency_contact': + person = self.find_person_name() + if not person: + raise ValidationError(_("Employee name cannot find")) + field_value = True + elif field.name == 'work_phone' or field.name == 'phone' or \ + field.name == 'emergency_phone': + phone = re.findall(phone_pattern, self.data) + if phone: + phone_number = phone[0] + elif field.name == 'private_email' or \ + field.name == 'work_email': + email = re.findall(email_pattern, self.data) + if email: + email_address = email[0] + else: + non_field_count = 1 + if not field_value and non_field_count == 1: + raise ValidationError(_("No data to map into the field")) + if person: + partner = self.env['hr.employee'].search( + [('name', '=', person)], limit=1) + if not partner: + # Creating a record in hr.employee by mapping the + # data into employee name, work phone and work email + employee_record = self.env['hr.employee'].create({ + 'name': person, + 'work_email': email_address, + 'work_phone': phone_number + }) + else: + raise ValidationError(_("Employee already exist")) + else: + raise ValidationError( + _("Name field is not chosen to create employee")) + if employee_record: + return { + 'name': "Employee", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'hr.employee', + 'res_id': employee_record.id, + 'view_id': self.env.ref('hr.view_employee_form').id, + 'target': 'current', + } + elif self.model_name_id.name == 'Expense': + # Mapping the data into Expense module + expense_product = False + for field in self.model_field_ids: + if field.name == 'name' or field.name == 'product_id': + expense_product = True + product = self.env['product.product'].search( + [('name', '=', 'BILL EXPENSE')], limit=1) + if not product: + product = self.env['product.product'].create({ + 'name': 'BILL EXPENSE', + }) + expense_record = self.env['hr.expense'].create({ + 'product_id': product.id, + }) + return { + 'name': "Expense", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'hr.expense', + 'res_id': expense_record.id, + 'view_id': self.env.ref( + 'hr_expense.hr_expense_view_form').id, + 'target': 'current', + } + if not expense_product: + raise ValidationError(_("Can't create an expense without " + "description or category")) + elif self.model_name_id.name == 'Sales Order': + # Mapping the data from PDF with proper format into Sale Order + sale_order = '' + partner = False + field_value = False + non_field_value = 0 + for field in self.model_field_ids: + if field.name == 'order_line': + field_value = True + person = self.find_person_name() + if person: + partner = self.env['hr.employee'].search( + [('name', '=', person)], limit=1) + if not partner: + partner = self.env['hr.employee'].create({ + 'name': person, + }) + # Calling the function to get order lines + product_line = self.get_order_line(self.data) + sale_order = self.env['sale.order'].create({ + 'partner_id': partner.id, + }) + if product_line: + for item in product_line: + if 'quantity' not in item.keys(): + item.update({'quantity': 0}) + if 'price' not in item.keys(): + item.update({'price': 0}) + product = self.env['product.product'].search( + [('name', '=', item['product'])], limit=1) + if not product: + product = self.env['product.product'].create({ + 'name': item['product'] + }) + item.update({'product': product.id}) + self.env['sale.order.line'].create({ + 'order_id': sale_order.id, + 'product_id': item['product'], + 'product_uom_qty': item['quantity'], + 'price_unit': item['price'] + }) + else: + non_field_value = 1 + if sale_order: + return { + 'name': "Sale order", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'sale.order', + 'res_id': sale_order.id, + 'view_id': self.env.ref('sale.view_order_form').id, + 'target': 'current', + } + if not field_value and non_field_value == 1: + raise ValidationError(_("No data to map into the field")) + elif self.model_name_id.name == 'Purchase Order': + # Mapping the data from PDF with proper format into Purchase Order + purchase_order = '' + field_value = False + non_field_value = 0 + partner = False + for field in self.model_field_ids: + if field.name == 'order_line': + field_value = True + person = self.find_person_name() + if person: + partner = self.env['hr.employee'].search( + [('name', '=', person)], limit=1) + if not partner: + partner = self.env['hr.employee'].create({ + 'name': person, + }) + # Calling the function to get order lines + product_line = self.get_order_line(self.data) + purchase_order = self.env['purchase.order'].create({ + 'partner_id': partner.id, + }) + if product_line: + for item in product_line: + if 'quantity' not in item.keys(): + item.update({'quantity': 0}) + if 'price' not in item.keys(): + item.update({'price': 0}) + product = self.env['product.product'].search( + [('name', '=', item['product'])], limit=1) + if not product: + product = self.env['product.product'].create({ + 'name': item['product'] + }) + item.update({'product': product.id}) + self.env['purchase.order.line'].create({ + 'order_id': purchase_order.id, + 'product_id': item['product'], + 'product_uom_qty': item['quantity'], + 'price_unit': item['price'] + }) + else: + non_field_value = 1 + if purchase_order: + return { + 'name': "Purchase order", + 'type': 'ir.actions.act_window', + 'view_type': 'form', + 'view_mode': 'form', + 'res_model': 'purchase.order', + 'res_id': purchase_order.id, + 'view_id': self.env.ref( + 'purchase.purchase_order_form').id, + 'target': 'current', + } + if not field_value and non_field_value == 1: + raise ValidationError(_("No data to map into the field")) + + @api.onchange('image') + def _onchange_image(self): + self.write({ + 'image2': self.image + }) diff --git a/ocr_data_retrieval/security/ir.model.access.csv b/ocr_data_retrieval/security/ir.model.access.csv new file mode 100644 index 000000000..754945175 --- /dev/null +++ b/ocr_data_retrieval/security/ir.model.access.csv @@ -0,0 +1,2 @@ +id,name,model_id/id,group_id/id,perm_read,perm_write,perm_create,perm_unlink +access_ocr_data_template_user,access.ocr.data.template.user,model_ocr_data_template,base.group_user,1,1,1,1 diff --git a/ocr_data_retrieval/static/description/assets/icons/cogs.png b/ocr_data_retrieval/static/description/assets/icons/cogs.png new file mode 100755 index 000000000..95d0bad62 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/cogs.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/consultation.png b/ocr_data_retrieval/static/description/assets/icons/consultation.png new file mode 100755 index 000000000..8319d4baa Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/consultation.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/ecom-black.png b/ocr_data_retrieval/static/description/assets/icons/ecom-black.png new file mode 100755 index 000000000..a9385ff13 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/ecom-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/education-black.png b/ocr_data_retrieval/static/description/assets/icons/education-black.png new file mode 100755 index 000000000..3eb09b27b Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/education-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/hotel-black.png b/ocr_data_retrieval/static/description/assets/icons/hotel-black.png new file mode 100755 index 000000000..130f613be Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/hotel-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/license.png b/ocr_data_retrieval/static/description/assets/icons/license.png new file mode 100755 index 000000000..a5869797e Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/license.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/lifebuoy.png b/ocr_data_retrieval/static/description/assets/icons/lifebuoy.png new file mode 100755 index 000000000..658d56ccc Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/lifebuoy.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/manufacturing-black.png b/ocr_data_retrieval/static/description/assets/icons/manufacturing-black.png new file mode 100755 index 000000000..697eb0e9f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/manufacturing-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/pos-black.png b/ocr_data_retrieval/static/description/assets/icons/pos-black.png new file mode 100755 index 000000000..97c0f90c1 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/pos-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/puzzle.png b/ocr_data_retrieval/static/description/assets/icons/puzzle.png new file mode 100755 index 000000000..65cf854e7 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/puzzle.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/restaurant-black.png b/ocr_data_retrieval/static/description/assets/icons/restaurant-black.png new file mode 100755 index 000000000..4a35eb939 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/restaurant-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/service-black.png b/ocr_data_retrieval/static/description/assets/icons/service-black.png new file mode 100755 index 000000000..301ab51cb Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/service-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/trading-black.png b/ocr_data_retrieval/static/description/assets/icons/trading-black.png new file mode 100755 index 000000000..9398ba2f1 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/trading-black.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/training.png b/ocr_data_retrieval/static/description/assets/icons/training.png new file mode 100755 index 000000000..884ca024d Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/training.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/update.png b/ocr_data_retrieval/static/description/assets/icons/update.png new file mode 100755 index 000000000..ecbc5a01a Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/update.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/user.png b/ocr_data_retrieval/static/description/assets/icons/user.png new file mode 100755 index 000000000..6ffb23d9f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/user.png differ diff --git a/ocr_data_retrieval/static/description/assets/icons/wrench.png b/ocr_data_retrieval/static/description/assets/icons/wrench.png new file mode 100755 index 000000000..6c04dea0f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/icons/wrench.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/categories.png b/ocr_data_retrieval/static/description/assets/misc/categories.png new file mode 100755 index 000000000..bedf1e0b1 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/categories.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/check-box.png b/ocr_data_retrieval/static/description/assets/misc/check-box.png new file mode 100755 index 000000000..42caf24b9 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/check-box.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/compass.png b/ocr_data_retrieval/static/description/assets/misc/compass.png new file mode 100755 index 000000000..d5fed8faa Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/compass.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/config.png b/ocr_data_retrieval/static/description/assets/misc/config.png new file mode 100755 index 000000000..71915e76c Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/config.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/corporate.png b/ocr_data_retrieval/static/description/assets/misc/corporate.png new file mode 100755 index 000000000..2eb13edbf Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/corporate.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/customer-support.png b/ocr_data_retrieval/static/description/assets/misc/customer-support.png new file mode 100755 index 000000000..79efc72ed Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/customer-support.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/features.png b/ocr_data_retrieval/static/description/assets/misc/features.png new file mode 100755 index 000000000..b41769f77 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/features.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/logo.png b/ocr_data_retrieval/static/description/assets/misc/logo.png new file mode 100644 index 000000000..cc3cc0ccf Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/logo.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/pictures.png b/ocr_data_retrieval/static/description/assets/misc/pictures.png new file mode 100755 index 000000000..56d255fe9 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/pictures.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/pie-chart.png b/ocr_data_retrieval/static/description/assets/misc/pie-chart.png new file mode 100755 index 000000000..426e05244 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/pie-chart.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/right-arrow.png b/ocr_data_retrieval/static/description/assets/misc/right-arrow.png new file mode 100755 index 000000000..730984a06 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/right-arrow.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/star.png b/ocr_data_retrieval/static/description/assets/misc/star.png new file mode 100755 index 000000000..2eb9ab29f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/star.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/support.png b/ocr_data_retrieval/static/description/assets/misc/support.png new file mode 100755 index 000000000..4f18b8b82 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/support.png differ diff --git a/ocr_data_retrieval/static/description/assets/misc/whatsapp.png b/ocr_data_retrieval/static/description/assets/misc/whatsapp.png new file mode 100755 index 000000000..d513a5356 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/misc/whatsapp.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/module01.png b/ocr_data_retrieval/static/description/assets/modules/module01.png new file mode 100644 index 000000000..fa1717f3b Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/module01.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/module02.png b/ocr_data_retrieval/static/description/assets/modules/module02.png new file mode 100644 index 000000000..4a8a93830 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/module02.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/module03.png b/ocr_data_retrieval/static/description/assets/modules/module03.png new file mode 100644 index 000000000..f5c24146b Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/module03.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/module04.png b/ocr_data_retrieval/static/description/assets/modules/module04.png new file mode 100644 index 000000000..1b74092f0 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/module04.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/module05.png b/ocr_data_retrieval/static/description/assets/modules/module05.png new file mode 100644 index 000000000..7ea58654d Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/module05.png differ diff --git a/ocr_data_retrieval/static/description/assets/modules/module06.png b/ocr_data_retrieval/static/description/assets/modules/module06.png new file mode 100644 index 000000000..c247ba6ac Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/modules/module06.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/contact_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/contact_mapping.png new file mode 100644 index 000000000..d260f2b96 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/contact_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/data_retrieval.png b/ocr_data_retrieval/static/description/assets/screenshots/data_retrieval.png new file mode 100644 index 000000000..090e3f194 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/data_retrieval.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/employee_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/employee_mapping.png new file mode 100644 index 000000000..dcec82549 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/employee_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/hero.gif b/ocr_data_retrieval/static/description/assets/screenshots/hero.gif new file mode 100644 index 000000000..f029ba8e2 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/hero.gif differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/ocr_menu.png b/ocr_data_retrieval/static/description/assets/screenshots/ocr_menu.png new file mode 100644 index 000000000..d322625ee Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/ocr_menu.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/purchase_order.pdf b/ocr_data_retrieval/static/description/assets/screenshots/purchase_order.pdf new file mode 100644 index 000000000..96096d802 Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/purchase_order.pdf differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/read_bill.png b/ocr_data_retrieval/static/description/assets/screenshots/read_bill.png new file mode 100644 index 000000000..96f7d719b Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/read_bill.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/sale_mapping.png b/ocr_data_retrieval/static/description/assets/screenshots/sale_mapping.png new file mode 100644 index 000000000..29b65fa1f Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/sale_mapping.png differ diff --git a/ocr_data_retrieval/static/description/assets/screenshots/sale_order.pdf b/ocr_data_retrieval/static/description/assets/screenshots/sale_order.pdf new file mode 100644 index 000000000..7fd5ad1df Binary files /dev/null and b/ocr_data_retrieval/static/description/assets/screenshots/sale_order.pdf differ diff --git a/ocr_data_retrieval/static/description/banner.jpg b/ocr_data_retrieval/static/description/banner.jpg new file mode 100644 index 000000000..edd1d48cf Binary files /dev/null and b/ocr_data_retrieval/static/description/banner.jpg differ diff --git a/ocr_data_retrieval/static/description/icon.png b/ocr_data_retrieval/static/description/icon.png new file mode 100644 index 000000000..af7de025d Binary files /dev/null and b/ocr_data_retrieval/static/description/icon.png differ diff --git a/ocr_data_retrieval/static/description/index.html b/ocr_data_retrieval/static/description/index.html new file mode 100755 index 000000000..0a6a1898a --- /dev/null +++ b/ocr_data_retrieval/static/description/index.html @@ -0,0 +1,704 @@ +
+ +
+ +
+
+ Community +
+
+
+ + + +

+ OCR Data Retrieval

+

+ Retrieving the Contents from Images and PDF Files.

+ + + +
+ + +
+
+ +
+

+ Explore This + Module

+
+ + + + +
+
+ +
+

+ Overview +

+
+
+
+ OCR data retrieval module allows user to read data from images anf PDF + files. + The module make use of OCR (Optical Character Recognition) that enables + the extraction of text from scanned documents. +
+
+ + + +
+
+ +
+

+ Configuration +

+
+
+
+ Need to install the following python packages + 1. pdf2image (pip install pdf2image). + 2. PIL (pip3 install PIL). + 3. pytesseract (pip3 install pytesseract). + 4. spacy (pip3 install spacy). + 5. en_core_web_sm (python3.8 -m spacy download en_core_web_sm). +
+
+ + + +
+
+ +
+

+ Features +

+
+
+
+
+ +
+ Retrieving Data from Images with .jpg, .jpeg, .png Format. +
+
+
+ +
+ Retrieving Data from .pdf Format. +
+
+ +
+ +
+ Can Map Data into Contact Module. +
+
+
+ +
+ Can Map Data into Employee and Expense Module. +
+
+ +
+ +
+ Can Map Data into Vendor Bill Module. +
+
+
+ +
+ Can Map Data into Sales and Purchase Module. +
+
+
+
+ + + +
+
+ +
+

+ Screenshots +

+
+
+
+ +
+

+ OCR Data Retrieval Menu Option

+

+ Menu that direct to the data retrieval.

+ +
+
+

+ Data Retrieval Template

+

+ Form to read file and retrieve data.

+ +
+
+

+ OCR Data Retrieval from Bill

+

+ Reading content from scanned bill and mapping into Vendor + Bill.

+ +
+
+

+ OCR Data Retrieval from Personal Cards

+

+ Reading content from personal documents and mapping into + Contact module.

+ +
+
+

+ OCR Data Mapping into Employee Module

+

+ Reading content from personal documents and mapping into + Employee module.

+ +
+
+

+ OCR Data Mapping into Sale Order

+

+ Reading content from PDF file and mapping into Sale Order.

+ +
+
+

+ (The example format for sale order PDF and purchase order PDF + is added in the module.)

+
+
+
+ + + +
+
+

Suggested Products

+
+ + +
+
+ + + +
+
+ +
+

+ Our Services +

+
+
+
+
+
+ +
+
+ Odoo + Customization
+
+
+
+ +
+
+ Odoo + Implementation
+
+
+
+ +
+
+ Odoo + Support
+
+
+
+ +
+
+ Hire + Odoo + Developer
+
+
+
+ +
+
+ Odoo + Integration
+
+
+
+ +
+
+ Odoo + Migration
+
+
+
+ +
+
+ Odoo + Consultancy
+
+
+
+ +
+
+ Odoo + Implementation
+
+
+
+ +
+
+ Odoo + Licensing Consultancy
+
+
+
+ + + +
+
+ +
+

+ Our + Industries +

+
+
+
+
+
+ +
+ Trading +
+

+ Easily procure + and + sell your products

+
+
+
+
+ +
+ POS +
+

+ Easy + configuration + and convivial experience.

+
+
+
+
+ +
+ Education +
+

+ A platform for + educational management.

+
+
+
+
+ +
+ Manufacturing +
+

+ Plan, track and + schedule your operations.

+
+
+
+
+ +
+ E-commerce & Website +
+

+ Mobile + friendly, + awe-inspiring product pages.

+
+
+
+
+ +
+ Service Management +
+

+ Keep track of + services and invoice.

+
+
+
+
+ +
+ Restaurant +
+

+ Run your bar or + restaurant methodically.

+
+
+ +
+
+ +
+ Hotel Management +
+

+ An + all-inclusive + hotel management application.

+
+
+
+
+ + + +
+
+ +
+

+ Support +

+
+
+
+
+
+
+ +
+
+

Need Help?

+

Got questions or need help? + Get in touch.

+ +

+ odoo@cybrosys.com

+
+
+
+
+
+
+
+ +
+
+

WhatsApp

+

Say hi to us on WhatsApp!

+ +

+ +91 86068 + 27707

+
+
+
+
+
+
+
+ +
+
+
+ \ No newline at end of file diff --git a/ocr_data_retrieval/static/src/img/pdf.png b/ocr_data_retrieval/static/src/img/pdf.png new file mode 100644 index 000000000..01b150888 Binary files /dev/null and b/ocr_data_retrieval/static/src/img/pdf.png differ diff --git a/ocr_data_retrieval/static/src/js/image_field.js b/ocr_data_retrieval/static/src/js/image_field.js new file mode 100644 index 000000000..782029d46 --- /dev/null +++ b/ocr_data_retrieval/static/src/js/image_field.js @@ -0,0 +1,13 @@ +/** @odoo-module */ + +import { patch } from "@web/core/utils/patch"; +import { ImageField } from "@web/views/fields/image/image_field"; + +patch(ImageField.prototype, "ImagefieldPatch",{ + getUrl(previewFieldName) { + if (!this.props.value){ + return `/ocr_data_retrieval/static/src/img/pdf.png` + } + return this._super(...arguments) + } +}) diff --git a/ocr_data_retrieval/views/ocr_data_template_views.xml b/ocr_data_retrieval/views/ocr_data_template_views.xml new file mode 100644 index 000000000..dd415daef --- /dev/null +++ b/ocr_data_retrieval/views/ocr_data_template_views.xml @@ -0,0 +1,80 @@ + + + + + ocr.data.template.view.tree + ocr.data.template + + + + + + + + + ocr.data.template.view.form + ocr.data.template + +
+ +
+
+
+ + + + + +
+
+
+
+
+ + + + +
+
+ + + + + + + + +