Skip to content

Commit

Permalink
Extract charset encoding from HTTP headers or html headers.
Browse files Browse the repository at this point in the history
workaround for #451 and #50.
  • Loading branch information
gainan authored and tux-mind committed Mar 27, 2016
1 parent 4339b93 commit 70d9a70
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 6 deletions.
44 changes: 44 additions & 0 deletions cSploit/src/org/csploit/android/net/http/RequestParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RequestParser
{
Expand Down Expand Up @@ -478,4 +480,46 @@ public static ArrayList<HttpCookie> getCookiesFromHeaders(ArrayList<String> head

return null;
}

/**
* extract the charset encoding from the HTTP response headers.
*
* @param contentType content-type header to be parsed
* @return returns the charset encoding if we've found it, or null.
*/
public static String getCharsetFromHeaders(String contentType){
if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){
String[] parts = contentType.toLowerCase().trim().split("=");
if (parts.length > 0)
return parts[1];
}

return null;
}

/**
* extract the charset encoding of a web site from the <meta> headers.
*
* @param body html body of the site to be parsed
* @return returns the charset encoding if we've found it, or null.
*/
public static String getCharsetFromBody(String body) {
if (body != null) {
// match <body>, <body onLoad="">, etc...
int headEnd = body.toLowerCase().indexOf("</head>");

// return null if there's no head tags
if (headEnd == -1)
return null;

String body_head = body.toLowerCase().substring(0, headEnd);

Pattern p = Pattern.compile("charset=([\"a-z0-9A-Z-]+)");
Matcher m = p.matcher(body_head);
if (m.find())
return m.toMatchResult().group(1).replaceAll("\"", "");
}

return null;
}
}
26 changes: 20 additions & 6 deletions cSploit/src/org/csploit/android/net/http/proxy/StreamThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
*/
package org.csploit.android.net.http.proxy;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.csploit.android.core.Logger;
import org.csploit.android.core.Profiler;
import org.csploit.android.core.System;
import org.csploit.android.core.Logger;
import org.csploit.android.net.ByteBuffer;
import org.csploit.android.net.http.RequestParser;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

public class StreamThread implements Runnable
{
private final static String[] FILTERED_CONTENT_TYPES = new String[]
Expand Down Expand Up @@ -159,7 +159,21 @@ public void run(){

headers = patched;

mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
// try to get the charset encoding from the HTTP headers.
String charset = RequestParser.getCharsetFromHeaders(contentType);

// if we haven't found the charset encoding on the HTTP headers, try it out on the body.
if (charset == null) {
charset = RequestParser.getCharsetFromBody(body);
}

if (charset != null) {
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes(charset));
}
else {
// if we haven't found the charset encoding, just handle it on ByteBuffer()
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
}

mWriter.write(mBuffer.getData());
mWriter.flush();
Expand Down

0 comments on commit 70d9a70

Please sign in to comment.