-
Notifications
You must be signed in to change notification settings - Fork 157
/
Copy pathIO.hs
312 lines (286 loc) · 12.1 KB
/
IO.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
{-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.Text.IO
-- Copyright : (c) 2009, 2010 Bryan O'Sullivan,
-- (c) 2009 Simon Marlow
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Portability : GHC
--
-- Efficient locale-sensitive support for text I\/O.
--
-- The functions in this module obey the runtime system's locale,
-- character set encoding, and line ending conversion settings.
--
-- If you want to do I\/O using the UTF-8 encoding, use @Data.Text.IO.Utf8@,
-- which is faster than this module.
--
-- If you know in advance that you will be working with data that has
-- a specific encoding, and your application is highly
-- performance sensitive, you may find that it is faster to perform
-- I\/O with bytestrings and to encode and decode yourself than to use
-- the functions in this module.
module Data.Text.IO
(
-- * File-at-a-time operations
readFile
, writeFile
, appendFile
-- * Operations on handles
, hGetContents
, hGetChunk
, hGetLine
, hPutStr
, hPutStrLn
-- * Special cases for standard input and output
, interact
, getContents
, getLine
, putStr
, putStrLn
) where
import Data.Text (Text)
import Prelude hiding (appendFile, getContents, getLine, interact,
putStr, putStrLn, readFile, writeFile)
import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
withFile)
import qualified Control.Exception as E
import Control.Monad (liftM2, when)
import Data.IORef (readIORef, writeIORef)
import qualified Data.Text as T
import Data.Text.Internal.Fusion (stream)
import Data.Text.Internal.Fusion.Types (Step(..), Stream(..))
import Data.Text.Internal.IO (hGetLineWith, readChunk)
import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer,
RawCharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer,
writeCharBuf)
import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType))
import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle,
wantWritableHandle)
import GHC.IO.Handle.Text (commitBuffer')
import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..),
HandleType(..), Newline(..))
import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
import System.IO.Error (isEOFError)
-- | The 'readFile' function reads a file and returns the contents of
-- the file as a string. The entire file is read strictly, as with
-- 'getContents'.
--
-- Beware that this function (similarly to 'Prelude.readFile') is locale-dependent.
-- Unexpected system locale may cause your application to read corrupted data or
-- throw runtime exceptions about "invalid argument (invalid byte sequence)"
-- or "invalid argument (invalid character)". This is also slow, because GHC
-- first converts an entire input to UTF-32, which is afterwards converted to UTF-8.
--
-- If your data is UTF-8,
-- using 'Data.Text.Encoding.decodeUtf8' '.' 'Data.ByteString.readFile'
-- is a much faster and safer alternative.
readFile :: FilePath -> IO Text
readFile name = openFile name ReadMode >>= hGetContents
-- | Write a string to a file. The file is truncated to zero length
-- before writing begins.
writeFile :: FilePath -> Text -> IO ()
writeFile p = withFile p WriteMode . flip hPutStr
-- | Write a string to the end of a file.
appendFile :: FilePath -> Text -> IO ()
appendFile p = withFile p AppendMode . flip hPutStr
catchError :: String -> Handle -> Handle__ -> IOError -> IO (Text, Bool)
catchError caller h Handle__{..} err
| isEOFError err = do
buf <- readIORef haCharBuffer
return $ if isEmptyBuffer buf
then (T.empty, True)
else (T.singleton '\r', True)
| otherwise = E.throwIO (augmentIOError err caller h)
-- | Wrap readChunk and return a value indicating if we're reached the EOF.
-- This is needed because unpack_nl is unable to discern the difference
-- between a buffer with just \r due to EOF or because not enough data was left
-- for decoding. e.g. the final character decoded from the byte buffer was \r.
readChunkEof :: Handle__ -> CharBuffer -> IO (Text, Bool)
readChunkEof hh buf = do t <- readChunk hh buf
return (t, False)
-- | /Experimental./ Read a single chunk of strict text from a
-- 'Handle'. The size of the chunk depends on the amount of input
-- currently buffered.
--
-- This function blocks only if there is no data available, and EOF
-- has not yet been reached. Once EOF is reached, this function
-- returns an empty string instead of throwing an exception.
hGetChunk :: Handle -> IO Text
hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk
where
readSingleChunk hh@Handle__{..} = do
buf <- readIORef haCharBuffer
(t, _) <- readChunkEof hh buf `E.catch` catchError "hGetChunk" h hh
return (hh, t)
-- | Read the remaining contents of a 'Handle' as a string. The
-- 'Handle' is closed once the contents have been read, or if an
-- exception is thrown.
--
-- Internally, this function reads a chunk at a time from the
-- lower-level buffering abstraction, and concatenates the chunks into
-- a single string once the entire file has been read.
--
-- As a result, it requires approximately twice as much memory as its
-- result to construct its result. For files more than a half of
-- available RAM in size, this may result in memory exhaustion.
hGetContents :: Handle -> IO Text
hGetContents h = do
chooseGoodBuffering h
wantReadableHandle "hGetContents" h readAll
where
readAll hh@Handle__{..} = do
let readChunks = do
buf <- readIORef haCharBuffer
(t, eof) <- readChunkEof hh buf
`E.catch` catchError "hGetContents" h hh
if eof
then return [t]
else (t:) `fmap` readChunks
ts <- readChunks
(hh', _) <- hClose_help hh
return (hh'{haType=ClosedHandle}, T.concat ts)
-- | Use a more efficient buffer size if we're reading in
-- block-buffered mode with the default buffer size. When we can
-- determine the size of the handle we're reading, set the buffer size
-- to that, so that we can read the entire file in one chunk.
-- Otherwise, use a buffer size of at least 16KB.
chooseGoodBuffering :: Handle -> IO ()
chooseGoodBuffering h = do
bufMode <- hGetBuffering h
case bufMode of
BlockBuffering Nothing -> do
d <- E.catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
if ioe_type e == InappropriateType
then return 16384 -- faster than the 2KB default
else E.throwIO e
when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromInteger $ d
_ -> return ()
-- | Read a single line from a handle.
hGetLine :: Handle -> IO Text
hGetLine = hGetLineWith T.concat
-- | Write a string to a handle.
hPutStr :: Handle -> Text -> IO ()
-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
hPutStr h t = do
(buffer_mode, nl) <-
wantWritableHandle "hPutStr" h $ \h_ -> do
bmode <- getSpareBuffer h_
return (bmode, haOutputNL h_)
let str = stream t
case buffer_mode of
(NoBuffering, _) -> hPutChars h str
(LineBuffering, buf) -> writeLines h nl buf str
(BlockBuffering _, buf)
| nl == CRLF -> writeBlocksCRLF h buf str
| otherwise -> writeBlocksRaw h buf str
hPutChars :: Handle -> Stream Char -> IO ()
hPutChars h (Stream next0 s0 _len) = loop s0
where
loop !s = case next0 s of
Done -> return ()
Skip s' -> loop s'
Yield x s' -> hPutChar h x >> loop s'
-- The following functions are largely lifted from GHC.IO.Handle.Text,
-- but adapted to a coinductive stream of data instead of an inductive
-- list.
--
-- We have several variations of more or less the same code for
-- performance reasons. Splitting the original buffered write
-- function into line- and block-oriented versions gave us a 2.1x
-- performance improvement. Lifting out the raw/cooked newline
-- handling gave a few more percent on top.
writeLines :: Handle -> Newline -> Buffer CharBufElem -> Stream Char -> IO ()
writeLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0
where
outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
where
inner !s !n =
case next0 s of
Done -> commit n False{-no flush-} True{-release-} >> return ()
Skip s' -> inner s' n
Yield x s'
| n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
| x == '\n' -> do
n' <- if nl == CRLF
then do n1 <- writeCharBuf raw n '\r'
writeCharBuf raw n1 '\n'
else writeCharBuf raw n x
commit n' True{-needs flush-} False >>= outer s'
| otherwise -> writeCharBuf raw n x >>= inner s'
commit = commitBuffer h raw len
writeBlocksCRLF :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
writeBlocksCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0
where
outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
where
inner !s !n =
case next0 s of
Done -> commit n False{-no flush-} True{-release-} >> return ()
Skip s' -> inner s' n
Yield x s'
| n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
| x == '\n' -> do n1 <- writeCharBuf raw n '\r'
writeCharBuf raw n1 '\n' >>= inner s'
| otherwise -> writeCharBuf raw n x >>= inner s'
commit = commitBuffer h raw len
writeBlocksRaw :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
writeBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0
where
outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
where
inner !s !n =
case next0 s of
Done -> commit n False{-no flush-} True{-release-} >> return ()
Skip s' -> inner s' n
Yield x s'
| n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
| otherwise -> writeCharBuf raw n x >>= inner s'
commit = commitBuffer h raw len
-- This function is completely lifted from GHC.IO.Handle.Text.
getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer)
getSpareBuffer Handle__{haCharBuffer=ref,
haBuffers=spare_ref,
haBufferMode=mode}
= do
case mode of
NoBuffering -> return (mode, error "no buffer!")
_ -> do
bufs <- readIORef spare_ref
buf <- readIORef ref
case bufs of
BufferListCons b rest -> do
writeIORef spare_ref rest
return ( mode, emptyBuffer b (bufSize buf) WriteBuffer)
BufferListNil -> do
new_buf <- newCharBuffer (bufSize buf) WriteBuffer
return (mode, new_buf)
-- This function is completely lifted from GHC.IO.Handle.Text.
commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool
-> IO CharBuffer
commitBuffer hdl !raw !sz !count flush release =
wantWritableHandle "commitAndReleaseBuffer" hdl $
commitBuffer' raw sz count flush release
{-# INLINE commitBuffer #-}
-- | Write a string to a handle, followed by a newline.
hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn h t = hPutStr h t >> hPutChar h '\n'
-- | The 'interact' function takes a function of type @Text -> Text@
-- as its argument. The entire input from the standard input device is
-- passed to this function as its argument, and the resulting string
-- is output on the standard output device.
interact :: (Text -> Text) -> IO ()
interact f = putStr . f =<< getContents
-- | Read all user input on 'stdin' as a single string.
getContents :: IO Text
getContents = hGetContents stdin
-- | Read a single line of user input from 'stdin'.
getLine :: IO Text
getLine = hGetLine stdin
-- | Write a string to 'stdout'.
putStr :: Text -> IO ()
putStr = hPutStr stdout
-- | Write a string to 'stdout', followed by a newline.
putStrLn :: Text -> IO ()
putStrLn = hPutStrLn stdout