espace-paie-odentas/extract-placeholders.js
odentas 59749d481b feat: Migration Cloudinary vers Poppler pour conversion PDF→JPEG
- Remplacer Cloudinary (US) par solution 100% AWS eu-west-3
- Lambda odentas-sign-pdf-converter avec pdftoppm
- Lambda Layer poppler-utils v5 avec dépendances complètes
- Trigger S3 ObjectCreated pour conversion automatique
- Support multi-pages validé (PDF 3 pages)
- Stockage images dans S3 odentas-docs
- PDFImageViewer pour affichage images converties
- Conformité RGPD garantie (données EU uniquement)
2025-10-28 10:22:45 +01:00

127 lines
4.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* Script pour extraire les positions exactes des placeholders {{Signature...}}
* depuis un PDF
*/
const fs = require('fs');
const path = require('path');
/**
* Regex pour matcher les placeholders de signature
* Format: {{Label;role=Role;type=type;height=H;width=W}}
*/
const PLACEHOLDER_REGEX = /\{\{([^;]+);role=([^;]+);type=([^;]+);height=(\d+);width=(\d+)\}\}/g;
async function extractPlaceholders(pdfPath) {
console.log(`\n📄 Analyse du PDF: ${pdfPath}\n`);
try {
// Lire le PDF comme texte brut
const pdfBuffer = fs.readFileSync(pdfPath);
const pdfText = pdfBuffer.toString('latin1');
console.log(`✅ PDF chargé\n`);
const placeholders = [];
let pageNum = 1;
// Chercher les placeholders dans le texte
let match;
PLACEHOLDER_REGEX.lastIndex = 0; // Reset la regex
while ((match = PLACEHOLDER_REGEX.exec(pdfText)) !== null) {
placeholders.push({
page: pageNum, // On va essayer de déterminer la page
label: match[1].trim(),
role: match[2].trim(),
type: match[3].trim(),
height: parseInt(match[4]),
width: parseInt(match[5]),
fullMatch: match[0],
});
}
// Essayer de mieux déterminer les pages en cherchant les marqueurs de page
const pageBreakRegex = /\x0c/g; // Form feed character
let pageBreakMatch;
let lastBreakIndex = 0;
let pageBreaks = [0];
while ((pageBreakMatch = pageBreakRegex.exec(pdfText)) !== null) {
pageBreaks.push(pageBreakMatch.index);
}
// Associer les placeholders aux pages correctes
placeholders.forEach(ph => {
const placeholderPos = pdfText.indexOf(ph.fullMatch);
if (placeholderPos !== -1) {
// Trouver quelle page contient ce placeholder
for (let i = pageBreaks.length - 1; i >= 0; i--) {
if (placeholderPos >= pageBreaks[i]) {
ph.page = i + 1;
break;
}
}
}
});
if (placeholders.length === 0) {
console.log('⚠️ Aucun placeholder trouvé!\n');
return;
}
// Afficher les résultats
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log('📍 PLACEHOLDERS DÉTECTÉS');
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
placeholders.forEach((ph, idx) => {
console.log(`${idx + 1}. ${ph.label}`);
console.log(` Rôle: ${ph.role}`);
console.log(` Page: ${ph.page}`);
console.log(` Dimensions: ${ph.width} × ${ph.height} mm`);
console.log(` Type: ${ph.type}\n`);
});
// Générer le code pour test-odentas-sign.js
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log('📋 CODE POUR test-odentas-sign.js');
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
console.log('positions: [');
placeholders.forEach(ph => {
console.log(` {`);
console.log(` role: '${ph.role}',`);
console.log(` page: ${ph.page},`);
console.log(` x: 100, // À ajuster selon la position exacte`);
console.log(` y: 680, // À ajuster selon la position exacte`);
console.log(` w: ${ph.width},`);
console.log(` h: ${ph.height},`);
console.log(` kind: 'signature',`);
console.log(` label: '${ph.label}',`);
console.log(` },`);
});
console.log(']');
// Sauvegarder en JSON
const outputPath = pdfPath.replace('.pdf', '-placeholders.json');
fs.writeFileSync(outputPath, JSON.stringify(placeholders, null, 2));
console.log(`\n💾 Résultats sauvegardés: ${outputPath}\n`);
} catch (error) {
console.error('❌ Erreur:', error.message);
process.exit(1);
}
}
// Point d'entrée
const pdfPath = process.argv[2] || path.join(__dirname, 'test-contrat.pdf');
if (!fs.existsSync(pdfPath)) {
console.error(`❌ Fichier non trouvé: ${pdfPath}`);
process.exit(1);
}
extractPlaceholders(pdfPath).catch(console.error);