Convert entities HTML to text

One Star

Convert entities HTML to text

Hi,
Is there a component or a function to convert entities HTML to text ?
(I use the Entreprise Edition in Java)

Thank you,
Carole
One Star

Re: Convert entities HTML to text

Hi Carole
This topic 22254 will help you.
Regards,
Pedro
One Star

Re: Convert entities HTML to text

Hi,
Finally, I used a new routine with the following code :

package routines;
import java.util.*;
public class StringUtils {

private StringUtils() {}

private static HashMap<String,String> htmlEntities;
static {
htmlEntities = new HashMap<String,String>();
htmlEntities.put("<","<"); htmlEntities.put(">",">");
htmlEntities.put("&","&"); htmlEntities.put(""","\"");
htmlEntities.put("à","à"); htmlEntities.put("À","À");
htmlEntities.put("Á", "Á"); htmlEntities.put("á", "á");
htmlEntities.put("â","â") ; htmlEntities.put("Ã", "Ã");
htmlEntities.put("ã", "ã"); htmlEntities.put("ä","ä");
htmlEntities.put("Ä","Ä") ; htmlEntities.put("Â","Â");
htmlEntities.put("å","å") ; htmlEntities.put("Å","Å");
htmlEntities.put("æ","æ") ; htmlEntities.put("Æ","Æ" );
htmlEntities.put("ý", "ý"); htmlEntities.put("ÿ", "ÿ");
htmlEntities.put("ç","ç"); htmlEntities.put("Ç","Ç");
htmlEntities.put("é","é"); htmlEntities.put("É","É" );
htmlEntities.put("è","è"); htmlEntities.put("È","È");
htmlEntities.put("ê","ê") ; htmlEntities.put("Ê","Ê");
htmlEntities.put("ë","ë") ; htmlEntities.put("Ë","Ë");
htmlEntities.put("Í", "Í"); htmlEntities.put("í", "í");
htmlEntities.put("Î", "Î"); htmlEntities.put("î","î");
htmlEntities.put("Ì", "Ì"); htmlEntities.put("ì", "ì");
htmlEntities.put("ï","ï") ; htmlEntities.put("Ï","Ï");
htmlEntities.put("ó","ó"); htmlEntities.put("ô","ô") ;
htmlEntities.put("Ô","Ô"); htmlEntities.put("Ó", "Ó");
htmlEntities.put("Ò", "Ò"); htmlEntities.put("ò", "ò");
htmlEntities.put("Õ", "Õ"); htmlEntities.put("õ", "õ");
htmlEntities.put("ö","ö") ; htmlEntities.put("Ö","Ö");
htmlEntities.put("Ñ","Ñ") ; htmlEntities.put("ø","ø") ;
htmlEntities.put("Ø","Ø"); htmlEntities.put("Ð","Ð") ;
htmlEntities.put("ð","ð") ; htmlEntities.put("Þ","Þ") ;
htmlEntities.put("þ","þ") ; htmlEntities.put("ß","ß") ;
htmlEntities.put("Ú", "Ú") ; htmlEntities.put("ú", "ú") ;
htmlEntities.put("ù","ù"); htmlEntities.put("Ù","Ù");
htmlEntities.put("û","û"); htmlEntities.put("Û","Û") ;
htmlEntities.put("ü","ü"); htmlEntities.put("Ü","Ü") ;
htmlEntities.put(" "," ");
htmlEntities.put("©","\u00a9");
htmlEntities.put("®","\u00ae");
htmlEntities.put("€","\u20a0");
}

/**
* unescapeHTML: remplacer les entitees HTML par leur caractère
*
*
* {talendTypes} String
*
* {Category} User Defined
*
* {param} String("champ") input: champ à traiter
*
* {example} unescapeHTML("champ") # "text du champ après traitement".
*/
public static final String unescapeHTML(String source) {
int i, j;
boolean continueLoop;
int skip = 0;
do {
continueLoop = false;
i = source.indexOf("&", skip);
if (i > -1) {
j = source.indexOf(";", i);
if (j > i) {
String entityToLookFor = source.substring(i, j + 1);
String value = (String) htmlEntities.get(entityToLookFor);
if (value != null) {
source = source.substring(0, i)
+ value + source.substring(j + 1);
continueLoop = true;
}
else if (value == null){
skip = i+1;
continueLoop = true;
}
}
}
} while (continueLoop);
return source;
}
}