You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

395 lines
19 KiB

# -*- coding: utf-8 -*-
################################################################################
#
# Cybrosys Technologies Pvt. Ltd.
#
# Copyright (C) 2023-TODAY Cybrosys Technologies(<https://www.cybrosys.com>).
# Author: Sruthi Renjith (odoo@cybrosys.com)
#
# You can modify it under the terms of the GNU AFFERO
# GENERAL PUBLIC LICENSE (AGPL v3), Version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU AFFERO GENERAL PUBLIC LICENSE (AGPL v3) for more details.
#
# You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
# (AGPL v3) along with this program.
# If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
import io
import pytesseract
import re
from PIL import Image
from odoo import Command, fields, models, _
from odoo.exceptions import ValidationError
class DigitizeBill(models.TransientModel):
""" To read documents and to convert into vendor bills """
_name = "digitize.bill"
_description = "Digitize Bill"
bill = fields.Binary(string="Document", required=True,
help="Choose a scanned document")
file_name = fields.Char(string="File Name", help="Name of the file")
def record_lines(self, product_lines_newline_name_qty_price_amount,
product_lines_name_qty_amount,
product_lines_qty_name_amount,
product_lines_name_qty_price_amount1,
product_lines_name_price_qty_amount_with_dollar,
product_lines_name_price_qty_amount,
product_lines_name_amount,
product_lines_name_qty_price_amount2,
product_lines_double_line_name,
product_lines_code_name,
product_lines_quantity_part,
product_lines_vendor_bill_pattern,
product_lines_quantity_price_amount_pattern,
product_lines_quantity_price_amount_pattern2,
product_line_name_with_extra_line):
""" Function to extract bill data from text read by pytesseract """
products = []
price = []
subtotal = []
quantity = []
if (product_lines_newline_name_qty_price_amount or
product_lines_name_qty_price_amount1 or
product_lines_name_qty_price_amount2):
if product_lines_name_qty_price_amount1:
product_lines_newline_name_qty_price_amount = (
product_lines_name_qty_price_amount1)
if product_lines_name_qty_price_amount2:
product_lines_newline_name_qty_price_amount = (
product_lines_name_qty_price_amount2)
# Fetching products and create new ones
products = [self.env['product.product'].search(
[('name', '=', line[0])], limit=1) or
self.env['product.product'].create(
{'name': line[0]})
for line in
product_lines_newline_name_qty_price_amount]
# Creating lists for prices and subtotals
price = [line[2] for line in
product_lines_newline_name_qty_price_amount]
subtotal = [line[3] for line in
product_lines_newline_name_qty_price_amount]
elif product_lines_name_qty_amount:
# Fetching products and create new ones
products = [
self.env['product.product'].search([('name', '=', line[0])],
limit=1) or
self.env['product.product'].create({'name': line[0]})
for line in product_lines_name_qty_amount]
# Calculating unit prices
price = [
float(line[2]) / float(line[1]) if float(line[1]) != 0 else 0
for line in product_lines_name_qty_amount]
# Creating a list for subtotals
subtotal = [line[2] for line in product_lines_name_qty_amount]
elif product_lines_qty_name_amount:
# Fetching products and create new ones
products = [
self.env['product.product'].search([('name', '=', line[1])],
limit=1) or
self.env['product.product'].create({'name': line[1]})
for line in product_lines_qty_name_amount]
# Calculating unit prices
price = [
float(line[2]) / float(line[0]) if float(line[0]) != 0 else 0
for line in product_lines_qty_name_amount]
# Creating a list for subtotals
subtotal = [line[2] for line in product_lines_qty_name_amount]
elif (product_lines_name_price_qty_amount_with_dollar or
product_lines_name_price_qty_amount):
if product_lines_name_price_qty_amount:
product_lines_name_price_qty_amount_with_dollar = (
product_lines_name_price_qty_amount)
# Fetching products and create new ones
products = [
self.env['product.product'].search([('name', '=', line[0])],
limit=1) or
self.env['product.product'].create({'name': line[0]})
for line in product_lines_name_price_qty_amount_with_dollar]
# Extracting item prices and amounts
price = [line[1].replace('$', '') if '$' in line[1] else line[1]
for line in
product_lines_name_price_qty_amount_with_dollar]
subtotal = [line[3].replace('$', '') if '$' in line[3] else line[3]
for line in
product_lines_name_price_qty_amount_with_dollar]
elif product_lines_name_amount:
# Fetching products and create new ones
products = [
self.env['product.product'].search([('name', '=', line[0])],
limit=1) or
self.env['product.product'].create({'name': line[0]})
for line in product_lines_name_amount]
# Extracting item amounts and create a list for subtotals
subtotal = [line[1].replace('$', '') if '$' in line[1] else line[1]
for line in product_lines_name_amount]
elif product_lines_code_name:
name_list_one = []
if product_line_name_with_extra_line:
for code, name in product_line_name_with_extra_line:
product_info = f"{code} {name}"
name_list_one.append(product_info)
names = [item.strip() for item in
product_lines_code_name[0].split('\n') if item.strip()]
if name_list_one and len(name_list_one) > len(names):
names = name_list_one
if product_lines_quantity_part:
order_lines = [line.strip() for line in
product_lines_quantity_part[0].split('\n') if
line.strip()]
elif product_lines_quantity_price_amount_pattern:
order_lines = [line.strip() for line in
product_lines_quantity_price_amount_pattern[
0].split('\n') if
line.strip()]
elif product_lines_quantity_price_amount_pattern2:
order_lines = [line.strip() for line in
product_lines_quantity_price_amount_pattern2[
0].split('\n') if
line.strip()]
else:
order_lines = []
# Create the 'products' list
products = [
self.env['product.product'].search([('name', '=', name)],
limit=1) or
self.env['product.product'].create({'name': name})
for name in names]
# Creating the unit price list
price = [float(line.split()[1].replace(',', '')) if line else 0 for
line in order_lines]
# Calculating and Creating the 'subtotal' list
subtotal = [float(line.split()[1].replace(',', '')) * float(
line.split()[0]) if line else 0 for line in order_lines]
# Creating the quantity list
quantity = [float(line.split()[0]) if line else 0 for line in
order_lines]
elif product_lines_vendor_bill_pattern:
products = [
self.env['product.product'].search([('name', '=', item[0])],
limit=1) or
self.env['product.product'].create({'name': item[0]})
for item in product_lines_vendor_bill_pattern]
quantity = [float(item[1]) for item in
product_lines_vendor_bill_pattern]
price = [float(item[2]) for item in
product_lines_vendor_bill_pattern]
subtotal = [float(item[2]) * float(item[1]) for item in
product_lines_vendor_bill_pattern]
if product_lines_double_line_name:
for line in product_lines_double_line_name:
product_name = line[0] + ' ' + line[1]
product = self.env['product.product'].search(
[('name', '=', product_name)], limit=1)
if not product:
product = self.env['product.product'].create({
'name': product_name
})
products.append(product)
if '$' in line[5]:
line[5] = line[5].replace('$', '')
subtotal.append(line[5])
item_price = line[4].replace('$', '') if '$' in line[4] else \
line[4]
price.append(item_price)
if bool(quantity):
# Looping lists to create product line values
move_line_vals = [
(0, 0, {
'product_id': product.id,
'name': 'Outside Bill',
'price_unit': price_amount,
'price_subtotal': total_amount,
'quantity': qty,
'tax_ids': [Command.set([])],
'move_id': self.id,
})
for product, price_amount, total_amount, qty in
zip(products, price, subtotal, quantity)
]
elif bool(price):
# Looping three lists to create product line values
move_line_vals = [
(0, 0, {
'product_id': product.id,
'name': 'Outside Bill',
'price_unit': price_amount,
'price_subtotal': total_amount,
'tax_ids': [Command.set([])],
'move_id': self.id,
})
for product, price_amount, total_amount in
zip(products, price, subtotal)
]
else:
move_line_vals = [
(0, 0, {
'product_id': product.id,
'name': 'Outside Bill',
'price_subtotal': total_amount,
'tax_ids': [Command.set([])],
'move_id': self.id,
})
for product, total_amount in zip(products, subtotal)
]
return move_line_vals
def create_record(self, text):
""" Function to create vendor bill """
# Different patterns or regular expressions to identify the data
# from the text
newline_name_qty_price_amount = \
r'\n([A-Za-z ]+) (\d+) (\d+\.\d{2}) (\d+\.\d{2})'
name_qty_amount = r'\n([A-Za-z ]+) (\d+) (\d+\.\d{2})'
qty_name_amount = r'\n(\d+) ([A-Za-z ]+) (\d+\.\d{2})'
name_qty_price_amount1 = r'\s*([A-Za-z() \d]+) (\d+) (\d+) (\d+)'
name_price_qty_amount_with_dollar = \
r'\n(\$?\w+(?: \w+)*) (\$[\d.]+) (\d+) (\$[\d.]+)'
name_price_qty_amount = \
r'\n(\$?\w+(?: \w+)*) (\[\d.]+) (\d+) (\[\d.]+)'
name_amount = r'\n(\$?\w+(?: \w+)*) (\d+\.\d{2})'
name_qty_price_amount2 = r"([\w\s]+)\s+(\d+)\s+(\d+)\s+\$(\d+)"
double_line_name = \
r'([\w\s]+)\s([\w\s]+)\s*([\w\s]+)\s*(\d+)\s*(\$\d+' \
r'\.\d{2})\s*(\d+\.\d{2})'
code_name = r'Description\n([\s\S]*?)(?=\n\n)'
quantity_price_pattern = r'Quantity Unit Price Taxes\n([\s\S]*?)(?=\n\n)'
vendor_bill_pattern = r'\[[A-Z0-9-]+\] (.+?) (\d+\.\d+) (\d+\.\d+) (\d+\.\d+%?) \$ (\d+\.\d+)'
quantity_price_amount_pattern = r'Quantity Unit Price Taxes Amount\n((?:\d+\.\d+\s+\d+\.\d+\s+\d+\.\d+% \$\s+\d+\.\d+\n)+)'
quantity_price_amount_pattern2 = r'Quantity Unit Price Taxes Amount([\s\S]*?)(?:Untaxed Amount|Tax|Total|$)'
name_with_extra_line = r'\[([A-Z0-9-]+)\] (.+)'
# Pattern to match date and bill number from the text
date_pattern = r'\d{1,2}/\d{1,2}/\d{2,4}'
bill_no_pattern = r'Bill\sNo\.: (\d{4})'
# Pattern to match the year format of date
# (two digit or four digit format)
year_pattern = re.compile(r'\d{2}')
# Matching each pattern with the text and fetching the matching
# data from it
try:
product_lines_newline_name_qty_price_amount = re.findall(
newline_name_qty_price_amount, text)
product_lines_name_qty_amount = re.findall(name_qty_amount, text)
product_lines_qty_name_amount = re.findall(qty_name_amount, text)
product_lines_name_qty_price_amount1 = re.findall(
name_qty_price_amount1, text)
product_lines_name_price_qty_amount_with_dollar = re.findall(
name_price_qty_amount_with_dollar,
text)
product_lines_name_price_qty_amount = re.findall(
name_price_qty_amount, text)
product_lines_name_amount = re.findall(name_amount, text)
product_lines_name_qty_price_amount2 = re.findall(
name_qty_price_amount2, text)
product_lines_double_line_name = re.findall(double_line_name, text)
date_match = re.search(date_pattern, text)
bill_no_match = re.search(bill_no_pattern, text)
product_lines_code_name = re.findall(code_name, text)
product_lines_quantity_part = re.findall(
quantity_price_pattern, text)
product_lines_vendor_bill_pattern = re.findall(
vendor_bill_pattern, text)
product_lines_quantity_price_amount_pattern = re.findall(
quantity_price_amount_pattern, text)
product_lines_quantity_price_amount_pattern2 = re.findall(
quantity_price_amount_pattern2, text)
product_line_name_with_extra_line = re.findall(
name_with_extra_line, text)
except Exception:
raise ValidationError(_("Cannot find the pattern"))
date_object = ''
if date_match:
# Reading the date value if the date pattern match any data
date_value = date_match.group()
match = year_pattern.search(date_value)
if date_value == '%d/%m/%y' or date_value == '%d/%m/%Y':
date_object = fields.datetime.strptime(
date_value, '%d/%m/%y') if match and len(
date_value.split('/')[
-1]) == 2 else fields.datetime.strptime(
date_value, '%d/%m/%Y')
elif date_value == '%m/%d/%y' or date_value == '%m/%d/%Y':
date_object = fields.datetime.strptime(
date_value, '%m/%d/%y') if match and len(
date_value.split('/')[
-1]) == 2 else fields.datetime.strptime(
date_value, '%m/%d/%Y')
date = date_object.strftime(
'%Y-%m-%d') if date_object else fields.Date.today()
else:
date = fields.Date.today()
# Fetching the bill number if te pattern matches
bill_no = bill_no_match.group(1) if bill_no_match else ''
# Calling the function to get the product lines of the bill
move_line_vals = self.record_lines(
product_lines_newline_name_qty_price_amount,
product_lines_name_qty_amount,
product_lines_qty_name_amount,
product_lines_name_qty_price_amount1,
product_lines_name_price_qty_amount_with_dollar,
product_lines_name_price_qty_amount,
product_lines_name_amount, product_lines_name_qty_price_amount2,
product_lines_double_line_name,
product_lines_code_name,
product_lines_quantity_part,
product_lines_vendor_bill_pattern,
product_lines_quantity_price_amount_pattern,
product_lines_quantity_price_amount_pattern2,
product_line_name_with_extra_line)
# After getting all the data, creating a record in the
# vendor bill
bill_record = self.env['account.move'].create([{
'move_type': 'in_invoice',
'ref': bill_no,
'date': date,
'invoice_date': date,
'invoice_line_ids': move_line_vals,
}])
return bill_record
def action_add_document(self):
""" Function that reading the file in the format .jpg, .jpeg and .png
and converting into text using OCR python package """
try:
file_attachment = self.env["ir.attachment"].search(
['|', ('res_field', '!=', False), ('res_field', '=', False),
('res_id', '=', self.id),
('res_model', '=', 'digitize.bill')],
limit=1)
file_path = file_attachment._full_path(file_attachment.store_fname)
with open(file_path, mode='rb') as file:
binary_data = file.read()
img = Image.open(io.BytesIO(binary_data))
# Resizing the image to improve the clarity
resized_img = img.resize((img.width * 2, img.height * 2),
resample=Image.BICUBIC)
except Exception:
raise ValidationError(_("Cannot identify data"))
# Converting the image into text using OCR python package
# pytesseract
try:
text = pytesseract.image_to_string(resized_img)
except Exception:
raise ValidationError(_("Data cannot read"))
# Calling the function to create vendor bill
bill_record = self.create_record(text)
# Opening the vendor bill using its id
return {
'name': "Invoice",
'type': 'ir.actions.act_window',
'view_type': 'form',
'view_mode': 'form',
'res_model': 'account.move',
'res_id': bill_record.id,
'view_id': self.env.ref('account.view_move_form').id,
'target': 'current',
}