Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

""" 

---------------------------------------------------------------------- 

Authors: Jan-Justin van Tonder 

---------------------------------------------------------------------- 

This file contains the logic for University of Pretoria ID 

card context. It is mainly intended for demonstration purposes. 

---------------------------------------------------------------------- 

""" 

 

import re 

from id_contexts.id_context import IDContext, FieldType, LineType 

from hutts_utils.hutts_logger import logger 

 

 

class UPStudentCard(IDContext): 

""" 

A class that represents an ID context for a University of Pretoria ID card. 

""" 

def __init__(self): 

""" 

Initialises the UPStudentCard object. 

""" 

# Logging for debugging purposes. 

logger.debug('Initialising %s...' % type(self).__name__) 

# Specify initial list of contexts for string image_processing when populating 

# the ID information dictionary to send as output. 

match_contexts = [{ 

'field': 'identity_number', 

'find': None, 

'field_type': FieldType.NUMERIC_ONLY, 

'line_type': LineType.UNTITLED_ADJACENT, 

'multi_line': False 

}, { 

'field': 'surname', 

'find': None, 

'field_type': FieldType.TEXT_ONLY, 

'to_uppercase': False, 

'line_type': LineType.UNTITLED_ADJACENT, 

'multi_line': False 

}, { 

'field': 'names', 

'find': None, 

'field_type': FieldType.TEXT_ONLY, 

'to_uppercase': True, 

'line_type': LineType.TITLED_NEWLINE, 

'multi_line': False, 

}] 

# Initialise parent 

IDContext.__init__(self, match_contexts) 

 

def _dictify(self, match_contexts, id_string, barcode_data, fuzzy_min_ratio, max_multi_line): 

""" 

This function is responsible for generating a dictionary object containing the relevant ID information, 

such as names, surname, ID number, etc., from a given input string containing said relevant information. 

In this particular ID context, the information is sparse and is mainly intended for demonstration purposes. 

 

Authors: 

Jan-Justin van Tonder 

 

Args: 

match_contexts (list): A list of dictionaries that contain the contextual information used in the process 

of retrieving field values from the OCR output string - not particularly useful for this ID context. 

id_string (str): A string containing some ID information. 

barcode_data (dict, Optional): A dictionary object containing information extracted from a barcode. 

fuzzy_min_ratio (float): The threshold ratio for a minimum, acceptable ratio of fuzziness when comparing 

two strings. 

max_multi_line (int): Specifies the maximum number of lines that is to be extracted from fields that are 

noted as running onto multiple lines. 

 

Returns: 

(dict): A dictionary object containing the relevant, extracted ID information. 

""" 

id_info = {} 

regexp = re.compile(r'[0-9]{6,10}') 

# Check if barcode data is available. 

if barcode_data and barcode_data['identity_number']: 

id_info['identity_number'] = barcode_data['identity_number'] 

for line_index, line in enumerate(id_string.split('\n')): 

is_match = re.match(regexp, re.sub('[^\d]', '', line)) 

# Check if ID number was already extracted from barcode data. 

if is_match: 

if 'identity_number' not in id_info: 

# Populate id_info with the student/staff number. 

id_info['identity_number'] = re.sub('[^\d]', '', line) 

# Retrieve some more information from the previous line. 

if line_index - 1 >= 0: 

try: 

# Split the line on spaces. 

id_line = id_string.split('\n')[line_index - 1].split(' ') 

# Attempt to extrapolate sex. 

sex = 'M' if id_line[0] == 'Mr' else None 

sex = 'F' if id_line[0] == 'Mrs' or id_line[0] == 'Miss' else sex 

# Attempt to get initials 

id_line.pop(0) 

initials = id_line[0] 

# Re-combine the rest of the list to get the surname. 

id_line.pop(0) 

surname = ' '.join(id_line) 

# Populate the id_info list to be returned. 

if sex is not None: 

id_info['sex'] = sex 

id_info['names'] = initials 

id_info['surname'] = surname 

except IndexError: 

# Log error and return with what we have. 

logger.warning('Failed to extract some ID information...') 

return id_info 

break 

return id_info