o
    =
i                     @  s  d dl mZ d dlZd dlmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
m
Z
 d dlZd dlZd dlmZ d dlmZmZmZ d dlZd dlmZmZmZmZmZmZmZmZmZ d dlmZ d d	l m!Z!m"Z" d d
l#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ ede,ddZ-e.dZ/h dZ0dZ1dZ2eG dd dZ3ddddZ4dZ5dZ6d dlZded!d"Z7d#d$ Z8dfd%d&Z9dgd(d)Z:dZ5dZ6dhd-d.Z;d/Z<d0Z=d d1l>m?Z?m@ZA did4d5ZBe-jCd6d7 d8d9 e-jCd:d7 d;d9 e!d<fdjdBdCZDdDdE ZEdFdG ZFe!d<dHfdkdJdKZGdldMdNZHdmdPdQZIdndSdTZJe-KdUdVdW ZLe-MdXdYdZ ZNe-Kd[d\d] ZOe-Md^d_d` ZPdodbdcZQdS )p    )annotationsN)convert_from_bytes)datetime)	dataclass)DictAnyTuple)		Blueprintrequestrender_template_stringredirecturl_forflashsessionrender_templatecurrent_app)secure_filename)DecimalInvalidOperation)Session)Base)InvoiceInvoiceItem)get_sessionwp_invoicesz/wp_invoices)
url_prefix>   jpgpdfpngjpegu  Eres un verificador de facturas. Reconstruye la lógica contable, valida que las cifras cuadren y NO inventes datos. Si un dato no aparece impreso, deja su valor.verbatim = null y explica el porqué en notes.

Formato y restricciones ESTRICTAS:
- DEVUELVE ÚNICAMENTE UN JSON VÁLIDO Y MINIFICADO (una sola línea, sin markdown, sin comentarios).
- NUNCA agregues texto fuera del JSON.
- NO uses saltos de línea dentro de strings; reemplaza \n por espacios en valores de texto.
- En arrays 'evidence' usa como máximo 3 tokens cortos (<=40 caracteres) y evita frases largas.
- Evita comillas tipográficas; usa solo comillas ASCII.

Distinciones: verbatim (impreso), computed (cálculo), evidence (IDs/bboxes), confidence (0..1).
Detección tipo: 'Corporate' vs 'POS' según reglas ya especificadas.
Reglas de cálculo y GST como se definieron.
POS 2 líneas: fusiona adecuadamente header_line/pricing_line y valida qty*unit≈line_total (0.01).
Marca inconsistencias aritméticas vs extraction_error vs human_error.
Normaliza decimales con punto.

SALIDA EXACTA en este esquema (sin comentarios):{'invoice_type':{'verbatim':'POS|Corporate','confidence':0},'header':{'supplier':{'name':{'verbatim':null,'computed':null,'confidence':0,'evidence':[],'notes':''},'abn':{'verbatim':null,'computed':null,'confidence':0,'evidence':[],'notes':''}},'invoice':{'number':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'date':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'time':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'type':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]}},'payment':{'method':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'tendered_total':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]}}},'items':[{'sku':{'verbatim':null,'computed':null,'confidence':0,'evidence':[],'notes':''},'description':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'qty':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'unit_price':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'line_total':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'checks':{'qty_x_unit_eq_total':false,'tolerance':0.01}}],'extras':[{'type':'surcharge|tip|donation|shipping|discount_global|rounding|fee','label':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'amount':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'tax_type':'taxable|gst_free|unknown'}],'totals':{'items_sum':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'gst':{'verbatim':null,'computed':null,'confidence':0,'evidence':[],'notes':''},'grand_total':{'verbatim':null,'computed':null,'confidence':0,'evidence':[]},'checks':{'items_sum_matches_subtotal_or_total':false,'subtotal_plus_gst_plus_extras_equals_grand_total':false,'tendered_equals_grand_total':false}},'status':'consistent|extraction_error|human_error','notes':[]}zgpt-4.1-minic                   @  s*   e Zd ZU ded< ded< dZded< dS )ExtractResultboolokzDict[str, Any] | NonedataN
str | Noneerror)__name__
__module____qualname____annotations__r%    r*   r*   </var/www/html/flask_server/apps/wp_invoices/api/blueprint.pyr    R   s   
 r    r#   bytesreturnstrc                 C  s   t | dS )Nutf-8)base64	b64encodedecode)r#   r*   r*   r+   _b64Y   s   r3   Tvisionrawdictr!   c                 C  s   zh| pi  di  di pi }t| dtr| dpdnd}| dp&g }d| v p8d| v p8d| v }td	d
 |D }|  dpHg }t|trXtdd
 |D }ndt| v }t|pf|pf|W S  tyr   Y dS w )uE   Heurística simple: true si el JSON extraído sugiere 'includes GST'.totalsgstnotes evidenceincludes gstincl gstgst includedc                 s  @    | ]}d t | v pdt | v pdt | v V  qdS r<   r=   r>   Nr.   lower.0xr*   r*   r+   	<genexpr>j      > z(_detect_gst_inclusive.<locals>.<genexpr>c                 s  r?   r@   rA   rC   r*   r*   r+   rF   o   rG   F)get
isinstancer.   rB   anylistr!   	Exception)r5   	gst_blockr9   evid	hay_noteshay_evid	raw_noteshay_rawr*   r*   r+   _detect_gst_inclusiveb   s     $
rS   c                 C  s  d|  dpdid|  dpdidd|  dpdid|  dp!diddiddidd|  d	p1did|  d
p9didd}t }|  D ]}td|}|rZ|t|d qFg }t|D ]D}|	d|  d| dppdid|  d| dp|did|  d| dpdid|  d| dpdid|  d| dpdid qad|  dpdid|  dpdid|  dpdid}|||fS )zArma un viewmodel con la misma estructura que usa el template
    (header/items/totals con subcampos .verbatim) a partir de los valores POST.verbatimsupplier_namer:   supplier_abn)nameabninvoice_numberinvoice_date)numberdatetimetypepayment_methodtendered_total)methodr`   )supplierinvoicepaymentzitems-(\d+)-   items--sku-description-qty-unit_price-line_totalskudescriptionqty
unit_price
line_total	items_sumr8   grand_total)rr   r8   rs   )
rH   setkeysrematchaddintgroupsortedappend)form	header_vmidxskmitems_vmi	totals_vmr*   r*   r+   _build_viewmodel_from_formw   s@   
	
r   c                 C  s  |   }|drtjdd|  tjd}zt|W S  tjy%   Y nw |d}|dkr6td|dd	}d	}d}d
}t	||d
 |dD ];\}}|ra|rSd	}qH|dkrZd}qH|dkr`d	}qH|dkrhd}qH|dkrq|d7 }qH|dkr|d8 }|dkr|d } nqH|d
u r|
d}	|	dkr|	|kr|	d }ntd||||| }
zt|
W S  tjy   tdd|
}t| Y S w )u  
    Convierte 'raw' en JSON estricto:
    - quita ```json ... ``` si existe
    - recorta texto extraño
    - extrae el bloque { ... } hasta el último cierre balanceado (ignorando llaves dentro de strings)
    - intenta parsear; si hay comas colgantes, las limpia.
    z```z^```(?:json)?\s*|\s*```$r:   )flags{zNo JSON object detectedr   FN)start\T"re   }zUnbalanced JSON bracesz,(\s*[}\]])z\1)strip
startswithrv   sub
IGNORECASEjsonloadsJSONDecodeErrorfind	enumeraterfind)r5   sr   in_stresclevelcutr   chendsnippetsnippet2r*   r*   r+   coerce_json   s\   




r   	pdf_bytesc                 C  s0   t | ddd}t }|d j|dd | S )N   r   )dpifmtr   PNG)format)r   ioBytesIOsavegetvalue)r   pagesbufr*   r*   r+   _pdf_first_page_to_png   s   r   
file_bytesfilenamemimetypec                 C  s  zddl m} | }|dkrLzt| } d}W n ty3 } ztddd| dW  Y d}~W S d}~ww t| }d	d
tddd| d| dgd}nt| }d	d
tddd| d| dgd}|jjt	|gddd}t
|dd}	|	s}t| }	td|	dd  t|	}
td|
dW S  ty } z)td ztd|	dd  W n	 ty   Y nw tddt|dW  Y d}~S d}~ww )u   
    - Imágenes (png/jpg/jpeg): input_image + image_url (string con data URL base64)
    - PDFs: Files API (purpose="vision") y luego input_file con file_id
    - Sin response_format: coercion robusto a JSON con coerce_json()
    r   )OpenAIapplication/pdf	image/pngFNz No pude convertir PDF a imagen: )r"   r#   r%   user
input_text)r^   textinput_imagezdata:z;base64,)r^   	image_url)rolecontenti    g        )modelinputmax_output_tokenstemperatureoutput_textzDEBUG raw(800):i   T)r"   r#   zOpenAI extraction failedzDEBUG last raw(800):)openair   r   rL   r    r3   PROMPT_BASE	responsescreateOPENAI_MODELgetattrr   dumpsr6   printr   log	exceptionr.   )r   r   r   r   clienteb64user_messagerespr5   r#   r*   r*   r+   extract_with_openai   s^   		
r   u  
<!doctype html>
<title>WP Invoices – Subir factura</title>
<h1>Subir factura (pdf/png/jpg/jpeg)</h1>
{% with msgs = get_flashed_messages(with_categories=true) %}
  {% if msgs %}
    <ul>{% for c,m in msgs %}<li><b>{{c}}</b>: {{m}}</li>{% endfor %}</ul>
  {% endif %}
{% endwith %}
<form action="{{ url_for('wp_invoices.process') }}" method="post" enctype="multipart/form-data">
  <input type="file" name="file" accept=".pdf,.png,.jpg,.jpeg" required>
  <button type="submit">Procesar</button>
</form>
u  
<!doctype html>
<title>WP Invoices – Revisión</title>
<h1>Revisión y edición</h1>
<form action="{{ url_for('wp_invoices.save') }}" method="post">
  <fieldset>
    <legend>Header</legend>
    Proveedor (name): <input name="supplier_name" value="{{safe(header['supplier']['name']['verbatim'])}}"><br>
    Proveedor (ABN): <input name="supplier_abn" value="{{safe(header['supplier']['abn']['verbatim'])}}"><br>
    Invoice # : <input name="invoice_number" value="{{safe(header['invoice']['number']['verbatim'])}}"><br>
    Fecha : <input name="invoice_date" value="{{safe(header['invoice']['date']['verbatim'])}}"><br>
    Método pago : <input name="payment_method" value="{{safe(header['payment']['method']['verbatim'])}}"><br>
    Total pagado : <input name="tendered_total" value="{{safe(header['payment']['tendered_total']['verbatim'])}}"><br>
  </fieldset>

  <fieldset>
    <legend>Items</legend>
    <table border="1" cellpadding="4" cellspacing="0">
      <tr><th>SKU</th><th>Descripción</th><th>Qty</th><th>Unit</th><th>Total</th></tr>
      {% for i, it in enumerate(items) %}
      <tr>
        <td><input name="items-{{i}}-sku" value="{{safe(it['sku']['verbatim'])}}"></td>
        <td><input name="items-{{i}}-description" value="{{safe(it['description']['verbatim'])}}" size="60"></td>
        <td><input name="items-{{i}}-qty" value="{{safe(it['qty']['verbatim'])}}"></td>
        <td><input name="items-{{i}}-unit_price" value="{{safe(it['unit_price']['verbatim'])}}"></td>
        <td><input name="items-{{i}}-line_total" value="{{safe(it['line_total']['verbatim'])}}"></td>
      </tr>
      {% endfor %}
    </table>
  </fieldset>

  <fieldset>
    <legend>Totales</legend>
    Items sum: <input name="items_sum" value="{{safe(totals['items_sum']['verbatim'])}}"> &nbsp;
    GST: <input name="gst" value="{{safe(totals['gst']['verbatim'])}}"> &nbsp;
    Gran total: <input name="grand_total" value="{{safe(totals['grand_total']['verbatim'])}}">
  </fieldset>

  <input type="hidden" name="raw_json" value='{{ raw_json | tojson | safe }}'>
  <button type="submit">Guardar</button>
</form>
<hr>
<details style="margin-top:1rem;" open>
  <summary style="cursor:pointer;font-weight:600;">JSON extraído (raw)</summary>
  {% if raw_json %}
  <div style="display:flex;gap:.5rem;align-items:center;margin:.5rem 0;">
    <button id="copyRawBtn" type="button">Copiar JSON</button>
    <small id="copyRawMsg" style="opacity:.8;"></small>
  </div>
  <pre id="rawBox" style="max-height:350px;overflow:auto;background:#0b1020;color:#e9eefc;padding:12px;border-radius:8px;font-size:12px;line-height:1.35;">
{{ raw_json | e }}
  </pre>
  {% else %}
    <p style="opacity:.8;">No hay JSON para mostrar.</p>
  {% endif %}
</details>
<script>
(function(){
  const btn = document.getElementById('copyRawBtn');
  const box = document.getElementById('rawBox');
  const msg = document.getElementById('copyRawMsg');
  if(btn && box){
    btn.addEventListener('click', async () => {
      try{
        await navigator.clipboard.writeText(box.innerText);
        msg.textContent = 'Copiado ✓';
        setTimeout(()=> msg.textContent = '', 1500);
      }catch(e){ msg.textContent = 'No se pudo copiar'; }
    });
  }
})();
</script>
)Markupescapevalr   c                 C  s   | d u rdS t | S )Nr:   )r.   )r   r*   r*   r+   _safe  s   r   c                 C  s   t tt| S N)r   _escaper   vr*   r*   r+   <lambda>  s    r   safe)rW   c                 C  s   t | S r   )r   )seqr*   r*   r+   r     s    r   0.01aDecimal | Nonebtolr   c                 C  s<   | d u s|d u r
dS z	t | | |kW S  ty   Y dS w )NF)absrL   )r   r   r   r*   r*   r+   _approx_equal  s   r   c                 C  s    t | tr
| |S t| |dS )zCObtiene un campo desde dict o desde un objeto (modelo) por getattr.N)rI   r6   rH   r   )itfieldr*   r*   r+   _val_get  s   

r   c                 C  s>   | d u rd S t | tr| S ztt| W S  ty   Y d S w r   )rI   r   r.   rL   r   r*   r*   r+   _to_dec_or_none  s   
r   autogst_modec              	   C  s~  i i d}dt di}t| D ]\}	}
tt|
d}tt|
d}tt|
d}i }g }|du r5|d |du r>|d d}|dur_|dur_z|| t d}W n ty^   d}Y nw |durl|d  |7  < n|durx|d  |7  < |dur|durzt|| |krt|| |d	< W n ty   d
|d	< Y nw |r||d< |r||d |	< q|}|dkrd}|durt|}t	d t	| t	d t	|d  t	d t	t||d   t	d t	| |du st||d  |krz|dur||d  nd
}W n ty   d
}Y nw t||d d< |dkrk|durj|durjt|}t|}|durj|durjzt|d | | |krXt|d | | |d d< W n tyi   d
|d d< Y nw n|dkr|durt|}|durzt|d | |krt|d | |d d< W n ty   d
|d d< Y nw |dur|durz*||t d  t d}t	d t	| t|| |krt|| |d d< W n ty   d
|d d< Y nw |dur/|dur/t|}t|}|dur/|dur/zt|| |krt|| |d d< W n ty.   d
|d d< Y nw |d  o9|d  }|||fS )u   
    Acepta items como lista de dicts Ó de modelos con attrs: qty, unit_price, line_total.
    Retorna (ok: bool, errors: dict, computed: dict).
    )itemsr7   items_sum_calc0ro   rp   rq   Nr   qty_unit_total_mismatchmismatchmissing_fieldsr   r   	exclusiveitem_sum	Citem_sumdiffsr   r7   items_sum_mismatchsum_plus_gst_mismatch	inclusiveitems_sum_vs_total_mismatchz1.1impliedgst_inclusive_mismatchtendered_vs_grand_mismatch)
r   r   r   r   r|   quantizerL   r   r.   r   )r   rr   r8   rs   r`   r   r   errorscomputedr   r   ro   unitlineitem_errmissingexpectedmodediffr   r"   r*   r*   r+   _validate_items_and_totals  s   






r  pathc                 C  s   t j| dd d S )NT)exist_ok)osmakedirs)r  r*   r*   r+   _ensure_dir&  s   r  rW   c                 C  s,   |   dd}ddd |D d d S )N _r:   c                 s  s$    | ]}|  s|d v r|V  qdS ))r
  -.N)isalnum)rD   r   r*   r*   r+   rF   ,  s   " z _slugify_name.<locals>.<genexpr>x   )r   replacejoin)rW   r   r*   r*   r+   _slugify_name)  s   r  r$   c                 C  sP   d }|rd|v r| ddd  }|s | r t| pdd}|r&d| S dS )Nr  re   r   r:   )rsplitrB   	mimetypesguess_extensionlstrip)r   r   extr*   r*   r+   _ext_from_mime_or_name.  s   r  /c                   C  s   t tS r   )r   
TPL_UPLOADr*   r*   r*   r+   upload_form6  s   r  z/processc            
      C  s  t jd} | stdd ttdS |  }t| jpd}| j	p+ddddd	|d
}t
jd}t| t jt|| }tj||}t|d}|| W d    n1 s\w   Y  |||t  d dtd< d|v r~|ddd  nd }|tvrtdd ttdS t|||}	|	jrt !dt"j#|	j$ddd d  t%dt"j#|	j$dd nt &d|	j& td|	j& d ttdS |	j$td< t"j#|	j$dddtd< t"j#|	j$d dd!td"< ttd#S )$Nfilez Selecciona un archivo de facturar%   wp_invoices.upload_formrc   r   r   z
image/jpeg)r   r   r   r   zapplication/octet-streamINVOICE_STAGING_DIRwbZ)r  	orig_namer   uploaded_atwpinv_staging_filer  re   r   r:   zFormato no permitidozExtraction OK: %sF)ensure_asciii  zWP_INVOICES_EXTRACT:zExtraction ERROR: %su   Error en extracción: wpinv_extract),:)
separatorsr#  wp_inv_raw_min   indentr#  wp_inv_raw_prettyzwp_invoices.review)'r
   filesrH   r   r   r   readr   r   r   r   configr  uuiduuid4hexr  r  r  r  openwriter   utcnow	isoformatr   r  rB   r   ALLOWED_EXTSr   r"   r   infor   r   r#   r   r%   )
fr#   r   r  r   staging_dirtmpnametmppathout	extractedr*   r*   r+   process;  sR   

$
 
r?  z/reviewc                  C  sd   t d} | stdd ttdS | di }| dg }| di }t d}td	||||d
S )Nr$  z,No hay datos para revisar. Sube una factura.r%   r  headerr   r7   r,  wp_invoices/review.html)r@  r   r7   raw_json)r   rH   r   r   r   r   )r#   r@  r   r7   
raw_prettyr*   r*   r+   reviewq  s   


rD  z/savec            (      C  sj  d@dd} | d}| d}| d}| d}| d}| d	pd }| d
p$d }| dp*d }| dp0d }	t  }
tj D ]"}|dr[zt|ddd }|
| W q9 tyZ   Y q9w q9g }t	|
D ]4}t
| d| d| d| dt| d| dt| d| dt| d| dd}|| qbt }t|pd |pd |pd |pd |pd t|t|t|t|	d	}|D ]}|j| qtdpi }t|}t||||	|td|rdndd\}}}|s'tdd tdpi }ttj\}}}tdptj|dd d!}|d"i }|d#g }|d$i }td%||||t|d&d'fS |d u r0|d( }|| |  td)d }|rtj|d*drt j!d+} t"|  tj#| |j$d,}!t"|! t%|d-|d.}"t&|j'p|d.pd/|j$ }#tj#|!|# d0|" }$|$|_(d}|$}%tj)|%rtj#|!|# d1| |" }%|d7 }tj)|%st*+|d* |% tj#|!|# d2}&z3t,|&d3d4d5!}'tj-|j$|d.|d-|%|d6d7|'d dd8 W d    n	1 sw   Y  W n
 ty   Y nw || |  t./d9|d.|% td:d; nt.0d<|j$ td=|j$ d>d; t1t2d?S )ANr:   c                 S  s   t j| p| S r   )r
   r}   rH   r   )rW   defaultr*   r*   r+   g  s   zsave.<locals>.grU   rV   rY   rZ   r_   r`   rr   r8   rs   rf   r  r)  re   rg   rh   ri   rj   rk   rl   )	rU   rV   rY   rZ   r_   r`   rr   r8   rs   r$  r   r   r   )r   rr   r8   rs   r`   r   r   u?   Hay inconsistencias aritméticas. Revisa los campos resaltados.r%   r,  Fr*  r@  r   r7   rA  )r@  r   r7   rB  r   r   i  r   r"  r  INVOICE_STORE_DIR08dr   r   invoice_	_original
_original_z_original.jsonwr/   )encodingr!  )
invoice_idoriginal_namer   stored_pathr!  )r#  r+  z&Archivo de factura %s almacenado en %sz#Archivo original guardado en disco.successz/No staging file found in session for invoice %szFactura guardada (id=z).r  )r:   )3rt   r
   r}   ru   r   ry   splitrx   rL   r{   r   _to_decimalr|   r   r   r   r   rH   rS   r  r   r   r   r   r   r   r   commitpopr  r  isfiler   r/  r  r  idr  r  rY   	file_pathexistsshutilmover3  dumploggingr8  warningr   r   )(rF  rU   rV   rY   rZ   r_   r`   rr   r8   rs   r   r   idxr   r   r   sessinvr5   gst_inclusiver"   r   r   r#   r~   r   r   rC  r@  
items_datatotals_datastaging	store_dirinv_dirr  base	dest_path
final_path	meta_pathmfr*   r*   r+   r     s   






$
r   r   c              	   C  s8   | sd S z|  dd}t|W S  ttfy   Y d S w )Nr%  r  )r  r   r   AttributeError)r   s2r*   r*   r+   rS  !  s   
rS  )r#   r,   r-   r.   )r5   r6   r-   r!   )r5   r.   r-   r6   )r   r,   r-   r,   )r   r,   r   r.   r   r.   r-   r    )r   r   r-   r.   )r   r   r   r   r   r   r-   r!   )r   r   r   r.   )r  r.   )rW   r.   r-   r.   )r   r$   r   r$   r-   r.   )r   r$   r-   r   )R
__future__r   r0   	pdf2imager   r   r  r0  r  rZ  r   r   r]  dataclassesr   typingr   r   r   rv   flaskr	   r
   r   r   r   r   r   r   r   werkzeug.utilsr   decimalr   r   sqlalchemy.ormr   apps.wp_invoices.db.baser   apps.wp_invoices.modelsr   r   	config.dbr   r&   wp_invoices_bp	getLoggerr   r7  r   r   r    r3   USE_FILES_API_FOR_PDFFILES_PURPOSErS   r   r   r   r   r  
TPL_REVIEW
markupsafer   r   r   r   add_app_template_globalr   r   r   r  r  r  r  rH   r  postr?  rD  r   rS  r*   r*   r*   r+   <module>   sx    ,
0


-
C
GK


p



5

 