Source code for hutts_verification.id_contexts.up_student_card

"""
This file contains the logic for University of Pretoria ID
card context. It is mainly intended for demonstration purposes.
"""

import re
from hutts_verification.id_contexts.id_context import IDContext, FieldType, LineType
from hutts_verification.utils.hutts_logger import logger

__author__ = "Jan-Justin van Tonder"
__copyright__ = "Copyright 2017, Java the Hutts"
__license__ = "BSD"
__maintainer__ = "Jan-Justin van Tonder"
__email__ = "J.vanTonder@tuks.co.za"
__status__ = "Development"


[docs]class UPStudentCard(IDContext): """ A class that represents an ID context for a University of Pretoria ID card. """ def __init__(self): """ Initialises the UPStudentCard object. """ # Logging for debugging purposes. logger.debug('Initialising %s...' % type(self).__name__) # Specify initial list of contexts for string image_processing when populating # the ID information dictionary to send as output. match_contexts = [{ 'field': 'identity_number', 'find': None, 'field_type': FieldType.NUMERIC_ONLY, 'line_type': LineType.UNTITLED_ADJACENT, 'multi_line': False }, { 'field': 'surname', 'find': None, 'field_type': FieldType.TEXT_ONLY, 'to_uppercase': False, 'line_type': LineType.UNTITLED_ADJACENT, 'multi_line': False }, { 'field': 'names', 'find': None, 'field_type': FieldType.TEXT_ONLY, 'to_uppercase': True, 'line_type': LineType.TITLED_NEWLINE, 'multi_line': False, }] # Initialise parent IDContext.__init__(self, match_contexts) def _dictify(self, match_contexts, id_string, barcode_data, fuzzy_min_ratio, max_multi_line): """ This function is responsible for generating a dictionary object containing the relevant ID information, such as names, surname, ID number, etc., from a given input string containing said relevant information. In this particular ID context, the information is sparse and is mainly intended for demonstration purposes. :param match_contexts (list): A list of dictionaries that contain the contextual information used in the process of retrieving field values from the OCR output string. :param id_string (str): A string containing some ID information. :param barcode_data (dict, Optional): A dictionary object containing information extracted from a barcode. :param fuzzy_min_ratio (float): The threshold ratio for a minimum, acceptable ratio of fuzziness when comparing two strings. :param max_multi_line (int): Specifies the maximum number of lines that is to be extracted from fields that are noted as running onto multiple lines. Returns: - (dict): A dictionary object containing the relevant, extracted ID information. """ id_info = {} regexp = re.compile(r'[0-9]{6,10}') # Check if barcode data is available. if barcode_data and barcode_data['identity_number']: id_info['identity_number'] = barcode_data['identity_number'] for line_index, line in enumerate(id_string.split('\n')): is_match = re.match(regexp, re.sub('[^\d]', '', line)) # Check if ID number was already extracted from barcode data. if is_match: if 'identity_number' not in id_info: # Populate id_info with the student/staff number. id_info['identity_number'] = re.sub('[^\d]', '', line) # Retrieve some more information from the previous line. if line_index - 1 >= 0: try: # Split the line on spaces. id_line = id_string.split('\n')[line_index - 1].split(' ') # Attempt to extrapolate sex. sex = 'M' if id_line[0] == 'Mr' else None sex = 'F' if id_line[0] == 'Mrs' or id_line[0] == 'Miss' else sex # Attempt to get initials id_line.pop(0) initials = id_line[0] # Re-combine the rest of the list to get the surname. id_line.pop(0) surname = ' '.join(id_line) # Populate the id_info list to be returned. if sex is not None: id_info['sex'] = sex id_info['names'] = initials id_info['surname'] = surname except IndexError: # Log error and return with what we have. logger.warning('Failed to extract some ID information...') return id_info break return id_info