Skip to content

Commit

Permalink
Merge pull request #40 from guzba/master
Browse files Browse the repository at this point in the history
1.4.0 handle windows utf16 in nim
  • Loading branch information
treeform authored Nov 7, 2021
2 parents 167a2ed + 8349003 commit 0064b79
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 95 deletions.
2 changes: 1 addition & 1 deletion puppy.nimble
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = "1.3.0"
version = "1.4.0"
author = "Andre von Houck"
description = "Puppy fetches HTML pages for Nim."
license = "MIT"
Expand Down
131 changes: 46 additions & 85 deletions src/puppy.nim
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ proc addDefaultHeaders(req: Request) =

when defined(windows) and not defined(puppyLibcurl):
# WinHTTP Windows
import puppy/windefs
import puppy/windefs, puppy/winutils
elif defined(macosx) and not defined(puppyLibcurl):
# AppKit macOS
import puppy/machttp
Expand All @@ -95,58 +95,12 @@ proc fetch*(req: Request): Response =
req.timeout = 60

when defined(windows) and not defined(puppyLibcurl):
proc wstr(str: string): string =
let wlen = MultiByteToWideChar(
CP_UTF8,
0,
str.cstring,
str.len.int32,
nil,
0
)
result.setLen(wlen * 2 + 1)
discard MultiByteToWideChar(
CP_UTF8,
0,
str.cstring,
str.len.int32,
cast[ptr WCHAR](result[0].addr),
wlen
)

proc `$`(p: ptr WCHAR): string =
let len = WideCharToMultiByte(
CP_UTF8,
0,
p,
-1,
nil,
0,
nil,
nil
)
if len > 0:
result.setLen(len)
discard WideCharToMultiByte(
CP_UTF8,
0,
p,
-1,
result[0].addr,
len,
nil,
nil
)
# The null terminator is included when -1 is used for the parameter length.
# Trim this null terminating character.
result.setLen(len - 1)

var hSession, hConnect, hRequest: HINTERNET
try:
let wideUserAgent = req.headers["user-agent"].wstr()
let wideUserAgent = req.headers["user-agent"].toUtf16()

hSession = WinHttpOpen(
cast[ptr WCHAR](wideUserAgent[0].unsafeAddr),
wideUserAgent[0].unsafeAddr,
WINHTTP_ACCESS_TYPE_AUTOMATIC_PROXY,
nil,
nil,
Expand Down Expand Up @@ -181,11 +135,11 @@ proc fetch*(req: Request): Response =
except ValueError as e:
raise newException(PuppyError, "Parsing port failed", e)

let wideHostname = req.url.hostname.wstr()
let wideHostname = req.url.hostname.toUtf16()

hConnect = WinHttpConnect(
hSession,
cast[ptr WCHAR](wideHostname[0].unsafeAddr),
wideHostname[0].unsafeAddr,
port,
0
)
Expand All @@ -203,20 +157,20 @@ proc fetch*(req: Request): Response =
objectName &= "?" & req.url.search

let
wideVerb = req.verb.toUpperAscii().wstr()
wideObjectName = objectName.wstr()
wideVerb = req.verb.toUpperAscii().toUtf16()
wideObjectName = objectName.toUtf16()

let
defaultAcceptType = "*/*".wstr()
defaultAcceptType = "*/*".toUtf16()
defaultacceptTypes = [
cast[ptr WCHAR](defaultAcceptType[0].unsafeAddr),
defaultAcceptType[0].unsafeAddr,
nil
]

hRequest = WinHttpOpenRequest(
hConnect,
cast[ptr WCHAR](wideVerb[0].unsafeAddr),
cast[ptr WCHAR](wideObjectName[0].unsafeAddr),
wideVerb[0].unsafeAddr,
wideObjectName[0].unsafeAddr,
nil,
nil,
cast[ptr ptr WCHAR](defaultacceptTypes.unsafeAddr),
Expand All @@ -231,11 +185,11 @@ proc fetch*(req: Request): Response =
for header in req.headers:
requestHeaderBuf &= header.key & ": " & header.value & CRLF

let wideRequestHeaderBuf = requestHeaderBuf.wstr()
let wideRequestHeaderBuf = requestHeaderBuf.toUtf16()

if WinHttpAddRequestHeaders(
hRequest,
cast[ptr WCHAR](wideRequestHeaderBuf[0].unsafeAddr),
wideRequestHeaderBuf[0].unsafeAddr,
-1,
(WINHTTP_ADDREQ_FLAG_ADD or WINHTTP_ADDREQ_FLAG_REPLACE).DWORD
) == 0:
Expand Down Expand Up @@ -278,35 +232,42 @@ proc fetch*(req: Request): Response =

result.code = statusCode

var responseHeaderBuf = newString(8192)
var
responseHeaderBytes: DWORD
responseHeaderBuf: seq[uint16]

proc readResponseHeaders() =
# Read the response headers. This may be called again after resizing
# the buffer.
var responseHeaderBytes = responseHeaderBuf.len.DWORD
if WinHttpQueryHeaders(
hRequest,
WINHTTP_QUERY_RAW_HEADERS_CRLF,
nil,
responseHeaderBuf.cstring,
responseHeaderBytes.addr,
nil
) == 0:
let errorCode = GetLastError()
if errorCode == ERROR_INSUFFICIENT_BUFFER:
responseHeaderBuf.setLen(responseHeaderBytes)
readResponseHeaders()
else:
raise newException(
PuppyError, "HttpQueryInfoW error: " & $errorCode
)
else:
responseHeaderBuf.setLen(responseHeaderBytes)
# Determine how big the header buffer needs to be
discard WinHttpQueryHeaders(
hRequest,
WINHTTP_QUERY_RAW_HEADERS_CRLF,
nil,
nil,
responseHeaderBytes.addr,
nil
)
let errorCode = GetLastError()
if errorCode == ERROR_INSUFFICIENT_BUFFER: # Expected!
# Set the header buffer to the correct size and inclue a null terminator
responseHeaderBuf.setLen(responseHeaderBytes div sizeof(uint16) + 1)
else:
raise newException(
PuppyError, "HttpQueryInfoW error: " & $errorCode
)

readResponseHeaders()
# Read the headers into the buffer
if WinHttpQueryHeaders(
hRequest,
WINHTTP_QUERY_RAW_HEADERS_CRLF,
nil,
responseHeaderBuf[0].addr,
responseHeaderBytes.addr,
nil
) == 0:
raise newException(
PuppyError, "HttpQueryInfoW error: " & $errorCode
)

let responseHeaders =
($cast[ptr WCHAR](responseHeaderBuf[0].addr)).split(CRLF)
let responseHeaders = responseHeaderBuf.toUtf8().split(CRLF)

template errorParsingResponseHeaders() =
raise newException(PuppyError, "Error parsing response headers")
Expand Down
9 changes: 0 additions & 9 deletions src/puppy/windefs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,6 @@ const

proc GetLastError*(): DWORD {.dynlib: "kernel32".}

proc MultiByteToWideChar*(
codePage: UINT,
dwFlags: DWORD,
lpMultiByteStr: LPCCH,
cbMultiByte: int32,
lpWideCharStr: LPWSTR,
cchWideChar: int32
): int32 {.dynlib: "kernel32".}

proc WideCharToMultiByte*(
codePage: UINT,
dwFlags: DWORD,
Expand Down
34 changes: 34 additions & 0 deletions src/puppy/winutils.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import puppy/common, unicode

proc toUtf16*(input: string): seq[uint16] =
for rune in input.runes:
let u = rune.uint32
if (0x0000 <= u and u <= 0xD7FF) or (0xE000 <= u and u <= 0xFFFF):
result.add(u.uint16)
elif 0x010000 <= u and u <= 0x10FFFF:
let
u0 = u - 0x10000
w1 = 0xD800 + u0 div 0x400
w2 = 0xDC00 + u0 mod 0x400
result.add(w1.uint16)
result.add(w2.uint16)
result.add(0) # null terminator

proc toUtf8*(input: seq[uint16]): string =
if input[input.high] != 0:
raise newException(PuppyError, "Missing UTF-16 null terminator")

var i: int
while i < input.high:
var u1 = input[i]
inc i
if u1 - 0xd800 >= 0x800:
result.add Rune(u1.int)
else:
var u2 = input[i]
inc i
if ((u1 and 0xfc00) == 0xd800) and ((u2 and 0xfc00) == 0xdc00):
result.add Rune((u1.uint32 shl 10) + u2.uint32 - 0x35fdc00)
else:
# Error, produce tofu character.
result.add ""

0 comments on commit 0064b79

Please sign in to comment.