Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 103 additions & 48 deletions spp_programs/models/managers/deduplication_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,15 @@ def _check_duplicate_by_individual_ids(self, beneficiaries):
group_of_duplicates[group_membership.individual.id].append(group_membership.group.id)

_logger.debug("Found %s duplicate group(s)", len(group_of_duplicates))
for _individual, group_ids in group_of_duplicates.items():
for individual_id, group_ids in group_of_duplicates.items():
group_ids_set = set(group_ids)
duplicate_beneficiaries = beneficiaries.filtered(lambda rec, gids=group_ids_set: rec.partner_id.id in gids)
duplicate_beneficiariy_ids = duplicate_beneficiaries.mapped("id")

self._record_duplicate(self, duplicate_beneficiariy_ids, "Duplicate individuals")
individual_rec = self.env["res.partner"].browse(individual_id)
group_names = duplicate_beneficiaries.mapped("partner_id.name")
reason = f"Shared member: {individual_rec.name} found in {len(group_ids)} groups ({', '.join(group_names)})"
self._record_duplicate(self, duplicate_beneficiariy_ids, reason)

duplicated_enrolled = duplicate_beneficiaries.filtered(lambda rec: rec.state == "enrolled")
if len(duplicated_enrolled) == 1:
Expand All @@ -158,7 +161,11 @@ class IDDocumentDeduplication(models.Model):
_inherit = ["spp.base.deduplication.manager", "spp.manager.source.mixin"]
_description = "ID Deduplication Manager"

supported_id_document_type_ids = fields.Many2many("spp.id.type", string="Supported ID Document Types")
supported_id_document_type_ids = fields.Many2many(
"spp.vocabulary.code",
string="Supported ID Document Types",
domain=[("vocabulary_id.namespace_uri", "=", "urn:openspp:vocab:id-type")],
)

def deduplicate_beneficiaries(self, states):
for rec in self:
Expand Down Expand Up @@ -233,7 +240,7 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):
if x.id_type_id in self.supported_id_document_type_ids and (
(not x.expiry_date) or x.expiry_date > date.today()
):
id_doc_id_with_id_type_and_value = {x.id: x.id_type_id.name + "-" + x.value}
id_doc_id_with_id_type_and_value = {x.id: x.id_type_id.display + "-" + x.value}
individual_id_docs.update(id_doc_id_with_id_type_and_value)

# Check ID Docs of each group
Expand All @@ -242,7 +249,7 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):
if x.id_type_id in self.supported_id_document_type_ids and (
(not x.expiry_date) or x.expiry_date > date.today()
):
id_doc_id_with_id_type_and_value = {x.id: x.id_type_id.name + "-" + x.value}
id_doc_id_with_id_type_and_value = {x.id: x.id_type_id.display + "-" + x.value}
individual_id_docs.update(id_doc_id_with_id_type_and_value)

_logger.debug("Found %s ID document(s) to check", len(individual_id_docs))
Expand All @@ -264,6 +271,11 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):
)

_logger.debug("Found %s group(s) with duplicates", len(group_with_duplicates))
# Build mapping: individual_id -> list of duplicate ID doc descriptions
individual_dup_docs = {}
for doc in duplicated_doc_ids:
individual_dup_docs.setdefault(doc.partner_id.id, []).append(f"{doc.id_type_id.display}: {doc.value}")

group_of_duplicates = {}
for group_membership in group_with_duplicates:
_logger.debug("Processing group membership duplicate")
Expand All @@ -272,12 +284,15 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):
group_of_duplicates[group_membership.individual.id].append(group_membership.group.id)

_logger.debug("Found %s duplicate group(s)", len(group_of_duplicates))
for _individual, group_ids in group_of_duplicates.items():
for individual_id, group_ids in group_of_duplicates.items():
group_ids_set = set(group_ids)
duplicate_beneficiaries = beneficiaries.filtered(lambda rec, gids=group_ids_set: rec.partner_id.id in gids)
duplicate_beneficiariy_ids = duplicate_beneficiaries.mapped("id")

self._record_duplicate(self, duplicate_beneficiariy_ids, "Duplicate ID Documents")
individual_rec = self.env["res.partner"].browse(individual_id)
doc_info = ", ".join(individual_dup_docs.get(individual_id, []))
reason = f"Duplicate ID document ({doc_info}) on member: {individual_rec.name}"
self._record_duplicate(self, duplicate_beneficiariy_ids, reason)

duplicated_enrolled = duplicate_beneficiaries.filtered(lambda rec: rec.state == "enrolled")
if len(duplicated_enrolled) == 1:
Expand All @@ -295,55 +310,65 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):

def _check_duplicate_by_individual(self, beneficiaries):
"""
This method is used to check if there are any duplicates
among the individuals id docs.
:param beneficiary_ids: The beneficiaries.
:return:
Check for duplicate ID documents among individual beneficiaries.
Groups duplicates by shared ID value and applies keep-one-enrolled logic.
"""
_logger.debug("-" * 100)
individual_ids = beneficiaries.mapped("partner_id.id")
individuals = self.env["res.partner"].search([("id", "in", individual_ids)])
_logger.debug("Checking ID Document Duplicates for %s individual(s)", len(individuals))

# Map: reg_id.id -> "IDType-Value", also track reg_id -> partner_id
individual_id_docs = {}
# Check ID Documents of each individual
reg_id_to_partner = {}
for i in individuals:
for x in i.reg_ids:
if x.id_type_id in self.supported_id_document_type_ids and (
(not x.expiry_date) or x.expiry_date > date.today()
):
id_doc_id_with_id_type_and_value = {x.id: x.id_type_id.name + "-" + x.value}
individual_id_docs.update(id_doc_id_with_id_type_and_value)
doc_key = x.id_type_id.display + "-" + x.value
individual_id_docs[x.id] = doc_key
reg_id_to_partner[x.id] = i.id
Comment on lines 322 to +331

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This nested loop over individuals and their reg_ids can cause N+1 query performance issues, especially with a large number of beneficiaries. It's more efficient to fetch all relevant spp.registry.id records in a single search and then process them.

Suggested change
individual_id_docs = {}
# Check ID Documents of each individual
reg_id_to_partner = {}
for i in individuals:
for x in i.reg_ids:
if x.id_type_id in self.supported_id_document_type_ids and (
(not x.expiry_date) or x.expiry_date > date.today()
):
id_doc_id_with_id_type_and_value = {x.id: x.id_type_id.name + "-" + x.value}
individual_id_docs.update(id_doc_id_with_id_type_and_value)
doc_key = x.id_type_id.display + "-" + x.value
individual_id_docs[x.id] = doc_key
reg_id_to_partner[x.id] = i.id
individual_id_docs = {}
reg_id_to_partner = {}
all_reg_ids = self.env["spp.registry.id"].search([
("partner_id", "in", individuals.ids),
("id_type_id", "in", self.supported_id_document_type_ids.ids),
])
for x in all_reg_ids:
if (not x.expiry_date) or x.expiry_date > date.today():
doc_key = f"{x.id_type_id.display}-{x.value}"
individual_id_docs[x.id] = doc_key
reg_id_to_partner[x.id] = x.partner_id.id


_logger.debug("Found %s ID document(s) to check", len(individual_id_docs))
rev_dict = {}
for key, value in individual_id_docs.items():
rev_dict.setdefault(value, set()).add(key)

duplicate_ids = filter(lambda x: len(x) > 1, rev_dict.values())
duplicate_ids = list(duplicate_ids)
duplicate_ids = list(itertools.chain.from_iterable(duplicate_ids))
_logger.debug("Found %s duplicate ID document(s)", len(duplicate_ids))
all_duplicated_memberships = self.env["spp.program.membership"]

duplicated_doc_ids = self.env["spp.registry.id"].search([("id", "in", duplicate_ids)])
individual_ids = [x.partner_id.id for x in duplicated_doc_ids]
individual_ids = list(dict.fromkeys(individual_ids))
_logger.debug("Found %s individual(s) with duplicated ID documents", len(individual_ids))
individual_program_membership = self.env["spp.program.membership"].search(
[
("partner_id", "in", individual_ids),
("program_id", "=", self.program_id.id),
]
)
# Process each group of duplicates sharing the same ID value
for doc_key, reg_ids in rev_dict.items():
if len(reg_ids) <= 1:
continue

for duplicates in individual_program_membership:
duplicate_individuals = [duplicates.id]
self._record_duplicate(self, duplicate_individuals, "Duplicate ID Documents")
partner_ids = list({reg_id_to_partner[rid] for rid in reg_ids})
dup_memberships = self.env["spp.program.membership"].search(
[
("partner_id", "in", partner_ids),
("program_id", "=", self.program_id.id),
]
)
if not dup_memberships:
continue

if duplicates.state == "enrolled":
duplicates.write({"state": "duplicated"})
names = dup_memberships.mapped("partner_id.name")
reason = f"Duplicate ID document ({doc_key}) shared by: {', '.join(names)}"
self._record_duplicate(self, dup_memberships.ids, reason)

return individual_program_membership
# Keep-one-enrolled logic
duplicated_enrolled = dup_memberships.filtered(lambda rec: rec.state == "enrolled")
if len(duplicated_enrolled) == 1:
to_mark = dup_memberships.filtered(lambda rec: rec.state != "enrolled")
else:
to_mark = dup_memberships
to_mark.filtered(lambda rec: rec.state not in ["exited", "not_eligible", "duplicated"]).write(
{"state": "duplicated"}
)

all_duplicated_memberships |= dup_memberships

return all_duplicated_memberships


class PhoneNumberDeduplication(models.Model):
Expand Down Expand Up @@ -462,6 +487,10 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):
)

_logger.debug("Found %s group(s) with duplicates", len(group_with_duplicates))
# Build mapping: individual_id -> list of duplicate phone numbers
individual_dup_phones = {}
for phone_rec in duplicate_individuals_ids:
individual_dup_phones.setdefault(phone_rec.partner_id.id, []).append(phone_rec.phone_no)

group_of_duplicates = {}
for group_membership in group_with_duplicates:
Expand All @@ -471,12 +500,15 @@ def _check_duplicate_by_group_with_individual(self, beneficiaries):
group_of_duplicates[group_membership.individual.id].append(group_membership.group.id)

_logger.debug("Found %s duplicate group(s)", len(group_of_duplicates))
for _individual, group_ids in group_of_duplicates.items():
for individual_id, group_ids in group_of_duplicates.items():
group_ids_set = set(group_ids)
duplicate_beneficiaries = beneficiaries.filtered(lambda rec, gids=group_ids_set: rec.partner_id.id in gids)
duplicate_beneficiariy_ids = duplicate_beneficiaries.mapped("id")

self._record_duplicate(self, duplicate_beneficiariy_ids, "Duplicate Phone Numbers")
individual_rec = self.env["res.partner"].browse(individual_id)
phone_info = ", ".join(individual_dup_phones.get(individual_id, []))
reason = f"Duplicate phone number ({phone_info}) on member: {individual_rec.name}"
self._record_duplicate(self, duplicate_beneficiariy_ids, reason)

duplicated_enrolled = duplicate_beneficiaries.filtered(lambda rec: rec.state == "enrolled")
if len(duplicated_enrolled) == 1:
Expand Down Expand Up @@ -524,21 +556,44 @@ def _check_duplicate_by_individual(self, beneficiaries):
individual_ids = [x.partner_id.id for x in duplicated_phone_ids]
individual_ids = list(dict.fromkeys(individual_ids))
_logger.debug("Individual IDS with Duplicated Phone Numbers: %s", individual_ids)
individual_program_membership = self.env["spp.program.membership"].search(
[
("partner_id", "in", individual_ids),
("program_id", "=", self.program_id.id),
]
)

for duplicates in individual_program_membership:
duplicate_individuals = [duplicates.id]
self._record_duplicate(self, duplicate_individuals, "Duplicate Phone Numbers")
all_duplicated_memberships = self.env["spp.program.membership"]

# Build reverse map: phone_sanitized -> list of partner_ids
phone_to_partners = {}
for phone_rec in duplicated_phone_ids:
phone_to_partners.setdefault(phone_rec.phone_sanitized, set()).add(phone_rec.partner_id.id)

for phone_val, partner_id_set in phone_to_partners.items():
if len(partner_id_set) <= 1:
continue

dup_memberships = self.env["spp.program.membership"].search(
[
("partner_id", "in", list(partner_id_set)),
("program_id", "=", self.program_id.id),
]
)
if not dup_memberships:
continue

names = dup_memberships.mapped("partner_id.name")
reason = f"Duplicate phone number ({phone_val}) shared by: {', '.join(names)}"
self._record_duplicate(self, dup_memberships.ids, reason)

# Keep-one-enrolled logic
duplicated_enrolled = dup_memberships.filtered(lambda rec: rec.state == "enrolled")
if len(duplicated_enrolled) == 1:
to_mark = dup_memberships.filtered(lambda rec: rec.state != "enrolled")
else:
to_mark = dup_memberships
to_mark.filtered(lambda rec: rec.state not in ["exited", "not_eligible", "duplicated"]).write(
{"state": "duplicated"}
)

if duplicates.state == "enrolled":
duplicates.write({"state": "duplicated"})
all_duplicated_memberships |= dup_memberships

return individual_program_membership
return all_duplicated_memberships


class IDPhoneEligibilityManager(models.Model):
Expand Down
6 changes: 4 additions & 2 deletions spp_programs/models/managers/program_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,10 @@ def _enroll_eligible_registrants(self, states, offset=0, limit=None, do_count=Fa
for el in eligibility_managers:
members = el.enroll_eligible_registrants(members)
# enroll the one not already enrolled:
# Exclude members that are duplicated or exited — those states
# should only be changed through their own workflows.
_logger.debug("members filtered: %s", members)
not_enrolled = members.filtered(lambda m: m.state != "enrolled")
not_enrolled = members.filtered(lambda m: m.state not in ("enrolled", "duplicated", "exited"))
_logger.debug("not_enrolled: %s", not_enrolled)
not_enrolled.write(
{
Expand All @@ -226,7 +228,7 @@ def _enroll_eligible_registrants(self, states, offset=0, limit=None, do_count=Fa
# dis-enroll the one not eligible anymore:
enrolled_members_ids = members.ids
members_to_remove = member_before.filtered(
lambda m: m.state != "not_eligible" and m.id not in enrolled_members_ids
lambda m: m.state not in ("not_eligible", "duplicated", "exited") and m.id not in enrolled_members_ids
)
# _logger.debug("members_to_remove: %s", members_to_remove)
members_to_remove.write(
Expand Down
52 changes: 39 additions & 13 deletions spp_programs/models/program_membership.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,23 @@ class SPPProgramMembership(models.Model):

registrant_id = fields.Integer(string="Registrant ID", related="partner_id.id")

duplicate_reason = fields.Char(
string="Duplicate Reason",
compute="_compute_duplicate_reason",
)

def _compute_duplicate_reason(self):
for rec in self:
if rec.state == "duplicated":
dup_records = self.env["spp.program.membership.duplicate"].search(
[("beneficiary_ids", "in", rec.id), ("state", "=", "duplicate")],
order="id desc",
limit=1,
)
rec.duplicate_reason = dup_records.reason if dup_records else False
else:
rec.duplicate_reason = False

@api.constrains("partner_id", "program_id")
def _check_unique_partner_per_program(self):
# Prefetch partner_id and program_id to avoid N+1 queries in loop
Expand Down Expand Up @@ -212,7 +229,13 @@ def enroll_eligible_registrants(self):
member = em.enroll_eligible_registrants(member)

if len(member) > 0:
if self.state != "enrolled":
if self.state in ("duplicated", "exited"):
message = _(
"Cannot enroll: beneficiary is currently %s.",
dict(self._fields["state"].selection).get(self.state, self.state),
)
kind = "warning"
elif self.state != "enrolled":
self.write(
{
"state": "enrolled",
Expand All @@ -221,19 +244,22 @@ def enroll_eligible_registrants(self):
)
message = _("%s Beneficiaries enrolled.", len(member))
kind = "success"
return {
"type": "ir.actions.client",
"tag": "display_notification",
"params": {
"title": _("Enrollment"),
"message": message,
"sticky": False,
"type": kind,
"next": {
"type": "ir.actions.act_window_close",
},
else:
message = _("Beneficiary is already enrolled.")
kind = "info"
return {
"type": "ir.actions.client",
"tag": "display_notification",
"params": {
"title": _("Enrollment"),
"message": message,
"sticky": False,
"type": kind,
"next": {
"type": "ir.actions.act_window_close",
},
}
},
}

else:
self.state = "not_eligible"
Expand Down
Loading
Loading