View Javadoc

1   package org.varienaja.util.wikipedia;
2   
3   /**
4    * Helper class.
5    * @author Varienaja
6    */
7   public class WikiContentHelper {
8   	
9   	/**
10  	 * Removes WikiLink-constructs from plaintext.
11       * <p>[[xx|yy]] --> yy<br/>
12       * [[xx]] --> xx</p>
13  	 * @param input The String
14  	 * @return The String with WikiLinks removed.
15  	 */
16  	public static String removeLinks(String input) {
17  		String step1 = input.replaceAll("\\[\\[[^\\|\\[\\]]*\\|([.[^\\]]]*)\\]\\]", "$1");
18  		String step2 = step1.replaceAll("\\[\\[([^\\]]*)\\]\\]", "$1");
19  		return step2;
20  	}
21  
22  	/**
23  	 * Removes superfluous quotes in a String.<br/>
24  	 * <p>I talked about '''something''' cool --> I talked about 'something' cool</p>
25  	 * @param content The String.
26  	 * @return The String without superfluous quotes.
27  	 */
28  	public static String sanitizeQuotes(String content) {
29  		String step1 = content.replaceAll("'''", "'");
30  		String step2 = step1.replaceAll("''", "'");
31  		return step2;
32  	}
33  
34  	/**
35  	 * Removes html from a String.
36  	 * <p>lalala.&lt;ref&gt;[sjalala]&lt;/ref&gt; lololo --> lalala. lololo</p>
37  	 * <p>lalala.&lt;a href&gt;[sjalala]&lt;/a&gt; lololo --> lalala. lololo</p>
38  	 * @param content The String.
39  	 * @return The String without ref-constructs.
40  	 */
41  	public static String removeRefs(String content) {
42  		String result = content.replaceAll("&lt;(\\w*).*&gt;(?:.)*&lt;/\\1&gt;", "");
43  		return result;
44  	}
45  
46  	/**
47  	 * Removes html-comments from a String.
48  	 * @param content The String.
49  	 * @return The String without comments.
50  	 */
51  	public static String removeComment(String content) {
52  		String step1 = content.replaceAll("&lt;!--(?:.)*--&gt;", "");
53  		String step2 = step1.replaceAll("<!--(?:.)*-->", "");
54  		return step2;
55  	}
56  
57  	/**
58  	 * Removes image-refs from a String.
59  	 * <p>[[Image:JohannesBrahms.jpg|thumb|right|250px|Johannes Brahms]]</p>
60  	 * @param content The String.
61  	 * @return The String without image-links.
62  	 */
63  	public static String removeImages(String content) {
64  		String result = content.replaceAll("\\[\\[Image(?:[.[^\\]]]*)\\]\\]", "");
65  		return result;
66  	}
67  
68  }