View Javadoc

1   package org.varienaja.util;
2   
3   import java.io.BufferedReader;
4   import java.io.IOException;
5   import java.io.InputStream;
6   import java.io.InputStreamReader;
7   import java.net.MalformedURLException;
8   import java.net.URL;
9   import java.text.DecimalFormat;
10  import java.text.DecimalFormatSymbols;
11  import java.text.NumberFormat;
12  import java.util.ArrayList;
13  import java.util.HashSet;
14  import java.util.LinkedList;
15  import java.util.List;
16  import java.util.Locale;
17  import java.util.Set;
18  import java.util.regex.Matcher;
19  import java.util.regex.Pattern;
20  
21  import org.apache.commons.codec.binary.Hex;
22  import org.apache.commons.io.IOUtils;
23  
24  /**
25   * Miscellaneous string utility methods.
26   * Copied from Subsonic.
27   * @author Sindre Mehus
28   * @version $Id: StringUtil.java,v 1.1 2010/03/16 18:55:42 varienaja Exp $
29   */
30  public final class StringUtil {
31  
32      public static final String ENCODING_LATIN = "ISO-8859-1";
33      public static final String ENCODING_UTF8 = "UTF-8";
34  
35      private static final String[][] HTML_SUBSTITUTIONS = {
36              {"&",  "&"},
37              {"<",  "&lt;"},
38              {">",  "&gt;"},
39              {"'",  "&#39;"},
40              {"\"", "&#34;"},
41      };
42  
43      private static final String[][] MIME_TYPES = {
44              {"mp3",  "audio/mpeg"},
45              {"mpg",  "video/mpeg"},
46              {"mpeg", "video/mpeg"},
47              {"mp4",  "audio/mp4"},
48              {"m4a",  "audio/mp4"},
49              {"mpg4", "audio/mp4"},
50              {"ogg",  "application/ogg"},
51      };
52  
53      /**
54       * Disallow external instantiation.
55       */
56      private StringUtil() {}
57  
58      /**
59       * Returns the specified string converted to a format suitable for
60       * HTML. All single-quote, double-quote, greater-than, less-than and
61       * ampersand characters are replaces with their corresponding HTML
62       * Character Entity code.
63       *
64       * @param s the string to convert
65       * @return the converted string
66       */
67      public static String toHtml(String s) {
68          if (s == null) {
69              return null;
70          }
71          for (String[] substitution : HTML_SUBSTITUTIONS) {
72              if (s.contains(substitution[0])) {
73                  s = s.replaceAll(substitution[0], substitution[1]);
74              }
75          }
76          return s;
77      }
78  
79      /**
80      * Returns the suffix (the substring after the last dot) of the given string. The dot
81      * is included in the returned suffix.
82      * @param s The string in question.
83      * @return The suffix, or an empty string if no suffix is found.
84      */
85      public static String getSuffix(String s) {
86          int index = s.lastIndexOf('.');
87          return index == -1 ? "" : s.substring(index);
88      }
89  
90      /**
91       * Removes the suffix (the substring after the last dot) of the given string. The dot is
92       * also removed.
93       * @param s The string in question, e.g., "foo.mp3".
94       * @return The string without the suffix, e.g., "foo".
95       */
96      public static String removeSuffix(String s) {
97          int index = s.lastIndexOf('.');
98          return index == -1 ? s : s.substring(0, index);
99      }
100 
101     /**
102     * Returns the proper MIME type for the given suffix.
103     * @param suffix The suffix, e.g., "mp3" or ".mp3".
104     * @return The corresponding MIME type, e.g., "audio/mpeg". If no MIME type is found,
105     *  <code>application/octet-stream</code> is returned.
106     */
107     public static String getMimeType(String suffix) {
108         for (String[] map : MIME_TYPES) {
109             if (map[0].equalsIgnoreCase(suffix) || ('.' + map[0]).equalsIgnoreCase(suffix)) {
110                 return map[1];
111             }
112         }
113         return "application/octet-stream";
114     }
115 
116     /**
117     * Converts a byte-count to a formatted string suitable for display to the user.
118     * For instance:
119     * <ul>
120     * <li><code>format(918)</code> returns <em>"918 B"</em>.</li>
121     * <li><code>format(98765)</code> returns <em>"96 KB"</em>.</li>
122     * <li><code>format(1238476)</code> returns <em>"1.2 MB"</em>.</li>
123     * </ul>
124     * This method assumes that 1 KB is 1024 bytes.
125     *
126     * @param byteCount The number of bytes.
127     * @return The formatted string.
128     */
129     public static synchronized String formatBytes(long byteCount) {
130 
131         // More than 1 GB?
132         if (byteCount >= 1024 * 1024 * 1024) {
133             NumberFormat gigaByteFormat = new DecimalFormat("0.00 GB", new DecimalFormatSymbols());
134             return gigaByteFormat.format((double) byteCount / (1024 * 1024 * 1024));
135         }
136 
137         // More than 1 MB?
138         if (byteCount >= 1024 * 1024) {
139             NumberFormat megaByteFormat = new DecimalFormat("0.0 MB", new DecimalFormatSymbols());
140             return megaByteFormat.format((double) byteCount / (1024 * 1024));
141         }
142 
143         // More than 1 KB?
144         if (byteCount >= 1024) {
145             NumberFormat kiloByteFormat = new DecimalFormat("0 KB", new DecimalFormatSymbols());
146             return kiloByteFormat.format((double) byteCount / 1024);
147         }
148 
149         return byteCount + " B";
150     }
151 
152     /**
153      * Splits the input string. White space is interpreted as separator token. Double quotes
154      * are interpreted as grouping operator. <br/>
155      * For instance, the input <code>"u2 rem "greatest hits""</code> will return an array with
156      * three elements: <code>{"u2", "rem", "greatest hits"}</code>
157      * @param input The input string.
158      * @return Array of elements.
159      */
160     public static String[] split(String input) {
161         if (input == null) {
162             return new String[0];
163         }
164 
165         Pattern pattern = Pattern.compile("\".*?\"|\\S+");
166         Matcher matcher = pattern.matcher(input);
167 
168         List<String> result = new ArrayList<String>();
169         while (matcher.find()) {
170             String element = matcher.group();
171             if (element.startsWith("\"") && element.endsWith("\"") && element.length() > 1) {
172                 element = element.substring(1, element.length() - 1);
173             }
174             result.add(element);
175         }
176 
177         return result.toArray(new String[0]);
178     }
179 
180     /**
181      * Reads lines from the given input stream. All lines are trimmed. Empty lines and lines starting
182      * with "#" are skipped. The input stream is always closed by this method.
183      * @param in The input stream to read from.
184      * @return Array of lines.
185      * @throws IOException If an I/O error occurs.
186      */
187     public static String[] readLines(InputStream in) throws IOException {
188         BufferedReader reader = null;
189 
190         try {
191             reader = new BufferedReader(new InputStreamReader(in));
192             List<String> result = new ArrayList<String>();
193             for (String line = reader.readLine(); line != null; line = reader.readLine()) {
194                 line = line.trim();
195                 if (!line.startsWith("#") && line.length() > 0) {
196                     result.add(line);
197                 }
198             }
199             return result.toArray(new String[0]);
200 
201         } finally {
202             IOUtils.closeQuietly(in);
203             IOUtils.closeQuietly(reader);
204         }
205     }
206 
207     /**
208      * Change protocol from "https" to "http" for the given URL. The port number is also changed,
209      * but not if the given URL is already "http".
210      * @param url The original URL.
211      * @param port The port number to use, for instance 443.
212      * @return The transformed URL.
213      * @throws MalformedURLException If the original URL is invalid.
214      */
215     public static String toHttpUrl(String url, int port) throws MalformedURLException {
216         URL u = new URL(url);
217         if ("https".equals(u.getProtocol())) {
218             return new URL("http", u.getHost(), port, u.getFile()).toString();
219         }
220         return url;
221     }
222 
223     /**
224      * Determines whether a is equal to b, taking null into account.
225      * @return Whether a and b are equal, or both null.
226      */
227     public static boolean isEqual(Object a, Object b) {
228         return a == null ? b == null : a.equals(b);
229     }
230 
231     /**
232      * Parses a locale from the given string.
233      * @param s The locale string. Should be formatted as per the documentation in {@link Locale#toString()}.
234      * @return The locale.
235      */
236     public static Locale parseLocale(String s) {
237         if (s == null) {
238             return null;
239         }
240 
241         String[] elements = s.split("_");
242 
243         if (elements.length == 0) {
244             return new Locale(s, "", "");
245         }
246         if (elements.length == 1) {
247             return new Locale(elements[0], "", "");
248         }
249         if (elements.length == 2) {
250             return new Locale(elements[0], elements[1], "");
251         }
252         return new Locale(elements[0], elements[1], elements[2]);
253     }
254 
255     /**
256      * Encodes the given string by using the hexadecimal representation of its UTF-8 bytes.
257      * @param s The string to encode.
258      * @return The encoded string.
259      * @throws Exception If an error occurs.
260      */
261     public static String utf8HexEncode(String s) throws Exception {
262         if (s == null) {
263             return null;
264         }
265         byte[] utf8 = s.getBytes(ENCODING_UTF8);
266         return String.valueOf(Hex.encodeHex(utf8));
267     }
268 
269     /**
270      * Decodes the given string by using the hexadecimal representation of its UTF-8 bytes.
271      * @param s The string to decode.
272      * @return The decoded string.
273      * @throws Exception If an error occurs.
274      */
275     public static String utf8HexDecode(String s) throws Exception {
276         if (s == null) {
277             return null;
278         }
279         return new String(Hex.decodeHex(s.toCharArray()), ENCODING_UTF8);
280     }
281     
282 	/**
283 	 * Parses the String of comma separated words and returns a
284 	 * list of the word strings. The first character of each 
285 	 * word is in uppercase. The resulting list contains each word at most one
286 	 * time.
287 	 *  
288 	 * @param words The string of comma separated words.
289 	 * @return A list of strings of the words in the string.
290 	 */
291     public static List<String> getIndividualWords(String words) {
292     	Set<String> internal = new HashSet<String>();
293 		List<String> result = new LinkedList<String>();
294 		if (words!=null) {
295 			String[] kws = words.split("\\s*,\\s*");
296 			for(String kw: kws) {
297 				String word = capitalize(kw.trim());
298 				if (internal.add(word)) {
299 					result.add(word);
300 				}
301 			}
302 		}
303 		return result;
304     }
305     
306     /**
307      * Changes the first letter into a capital.
308      * @param word The word to capitalize.
309      * @return The capitalized string.
310      */
311     public static String capitalize(String word) {
312 		if(word!=null && word.length()>0) {
313 			String capWord = Character.toUpperCase(word.charAt(0)) + word.substring(1);
314 			return capWord;
315 		} else {
316 			return word;
317 		}
318     }
319     
320 	/**
321 	 * Scans (case insensitive) if a String is present in the List of Strings.
322 	 * @param items The items
323 	 * @param query The query
324 	 * @return Whether the query is contained in the items.
325 	 */
326 	public static boolean contains(List<String> items, String query) {
327 		if (query==null) return false;
328 		query = query.toLowerCase();
329 		
330 		for (String candidate : items) {
331 			if (candidate!=null) {
332 				if (query.equals(candidate.toLowerCase())) {
333 					return true;
334 				}
335 			}
336 		}
337 		return false;
338 	}
339 
340 }