o
    i                     @  sB   d dl mZ d dlmZ ddlmZ ddlmZ G dd dZdS )	    )annotations)Any   )AUTO_CONFIRM_THRESHOLD)now_isoc                   @  sT   e Zd ZdZdddZdd	d
ZdddZdddZedddZ	edddZ
dS )CanonicalServicezFBuilds canonical extraction payloads from low-level file observations.manifestdict[str, Any]text_previewstrreturnc                C  s
  | dpi  d| d}i d| d| dd| dd| dd| dd| dd	| d	d| dd
| d
d| | d
dd|d|dd d d d d ddd d d ddd d d dd| ddkdddddddg g g t dS )Nfilesoriginalfilenamedocument_iddoc_idapplicant_iddocument_typecategorysubcategory	extensionsource_type source_filer
   owner)	full_namegiven_namessurnamenormalized_namenationalityidentifiers)passport_numbernational_idpersonal_numberdates)date_of_birth
issue_dateexpiry_date	relevanceidentityTF)r)   visa_482visa_186skill_assessmentreview_statuspending_reviewverified_evidence)inferred_suggestionsmissing_evidenceprocessed_at)get_infer_source_typer   )selfr   r
   r    r6   =apps/migration_evidence_builder/services/canonical_service.pybuild_document_json   sv   






	
#$%z$CanonicalService.build_document_jsonc             
   C  s   t |dd}g }dD ]}|| v s|| v r|| q|d|d|d|d|dtt|g g g g dd	gt d
S )Nr   r   )passportdegree
employmentcontract	referencetranslationr   r   r   r   )detected_keywordspeopleorganizationsr$   occupationsziV1 extraction is metadata + text-preview based. Add OCR/NER parsers in extraction_service for production.)r   r   r   r   entitiesverification_notesr2   )r   r3   lowerappendsortedsetr   )r5   r   r
   r   hintstokenr6   r6   r7   build_entities_json8   s(   

z$CanonicalService.build_entities_jsonc                C  s   t | }t|dpd }|dv }| j||d}|tko!|}|d|d|d||s3dnd||r9g nd	g| ||t|rLd
|ddtdnd |||sSdnddgdt dS )Nr   r      .tif.tiff.webp.jpg.png.jpegr   has_textr   r   unknownmanual_review_requiredz?No text preview extracted; likely scanned image or binary file.zconfidence z.2fz >= threshold z6Validate key facts manually against original evidence.zOAuto-confirmed by confidence threshold; optional manual spot-check recommended.z9Use verified_evidence only for sponsor submission drafts.)readable_text_detected
confidencerecommended_next_steps)r   r   ocr_usedimage_qualityneeds_rotationissuesneeds_manual_reviewverifiedauto_confirmedauto_confirm_thresholdauto_confirm_reasonqualityr2   )boolstripr   r3   rE   _estimate_confidencer   r   )r5   r   r
   rT   r   is_imagerX   auto_confirmr6   r6   r7   build_quality_jsonR   s8   

z#CanonicalService.build_quality_jsondocument_jsonentities_jsonquality_jsonc                 C  s   d d|dd dddddd	d
dddddddddd |di dg p(d ddd|di dd d|di dd ddd|d d |d! d"|d# d$|d% gS )&N
z# Evidence Summary: r   rU   r   z
## PurposezDThis summary supports migration evidence preparation workflows only.zFIt does not determine visa approval and does not replace legal advice.z## Verified Evidencez+- None marked yet (manual review required).z## Inferred SuggestionszG- Review detected keywords and map facts into verified evidence fields.z## Missing EvidencezE- Confirm mandatory fields for subclass 482/186 package are complete.z## Extracted Hintsz- Keywords: z, rC   r?   Nonez
## Qualityz- Confidence: rc   rX   r   z- Readable text detected: rW   Fz## Canonical JSONz
- doc_id: r   z- category: r   z- subcategory: r   z- document_type: r   )joinr3   )r5   r   rj   rk   rl   r6   r6   r7   build_summary_markdowns   s<   "z'CanonicalService.build_summary_markdownr   c              	   C  s&   ddddddddd}| |  dS )Npdfimagetextdocument).pdfrP   rR   rQ   .txt.mdz.docz.docxrU   )r3   rE   )r   mappingr6   r6   r7   r4      s   
z#CanonicalService._infer_source_typerT   rd   floatc                 C  s$   |sdS | dv r
dS | dv rdS dS )Ng?>   .csv.jsonrw   ru   rv   g(\?rL   g{Gz?gffffff?r6   rS   r6   r6   r7   rf      s   z%CanonicalService._estimate_confidenceN)r   r	   r
   r   r   r	   )
r   r	   rj   r	   rk   r	   rl   r	   r   r   )r   r   r   r   )r   r   rT   rd   r   ry   )__name__
__module____qualname____doc__r8   rK   ri   rp   staticmethodr4   rf   r6   r6   r6   r7   r   	   s    

,

!(r   N)	
__future__r   typingr   configr   utils.time_utilsr   r   r6   r6   r6   r7   <module>   s
    