11-- | These functions allow PureScript strings to be treated as if they were
22-- | sequences of Unicode code points instead of their true underlying
33-- | implementation (sequences of UTF-16 code units). For nearly all uses of
4- -- | strings, these functions should be preferred over the ones in Data.String.
4+ -- | strings, these functions should be preferred over the ones in ` Data.String` .
55module Data.String.CodePoints
66 ( module StringReExports
77 , CodePoint ()
@@ -59,10 +59,34 @@ instance showCodePoint :: Show CodePoint where
5959-- I would prefer that this smart constructor not need to exist and instead
6060-- CodePoint just implements Enum, but the Enum module already depends on this
6161-- one. To avoid the circular dependency, we just expose these two functions.
62+ -- |
63+ -- | ```purescript
64+ -- | >>> it = codePointFromInt 0x1D400 -- U+1D400 MATHEMATICAL BOLD CAPITAL A
65+ -- | Just (CodePoint 0x1D400)
66+ -- |
67+ -- | >>> map singleton it
68+ -- | Just "𝐀"
69+ -- |
70+ -- | >>> codePointFromInt 0x110000 -- does not correspond to a Unicode code point
71+ -- | Nothing
72+ -- | ```
73+ -- |
6274codePointFromInt :: Int -> Maybe CodePoint
6375codePointFromInt n | 0 <= n && n <= 0x10FFFF = Just (CodePoint n)
6476codePointFromInt n = Nothing
6577
78+ -- |
79+ -- | ```purescript
80+ -- | >>> codePointToInt (codePointFromChar 'B')
81+ -- | 66
82+ -- |
83+ -- | >>> boldA = codePointFromInt 0x1D400
84+ -- | >>> boldA
85+ -- | Just (CodePoint 0x1D400)
86+ -- | >>> map codePointToInt boldA
87+ -- | Just 119808 -- is the same as 0x1D400
88+ -- | ```
89+ -- |
6690codePointToInt :: CodePoint -> Int
6791codePointToInt (CodePoint n) = n
6892
@@ -109,6 +133,15 @@ unsafeCodePointAt0Fallback s =
109133-- | Returns the first code point of the string after dropping the given number
110134-- | of code points from the beginning, if there is such a code point. Operates
111135-- | in constant space and in time linear to the given index.
136+ -- |
137+ -- | ```purescript
138+ -- | >>> codePointAt 1 "𝐀𝐀𝐀𝐀"
139+ -- | Just (CodePoint 0x1D400) -- represents "𝐀"
140+ -- | -- compare to Data.String:
141+ -- | >>> charAt 1 "𝐀𝐀𝐀𝐀"
142+ -- | Just '�'
143+ -- | ```
144+ -- |
112145codePointAt :: Int -> String -> Maybe CodePoint
113146codePointAt n _ | n < 0 = Nothing
114147codePointAt 0 " " = Nothing
@@ -133,6 +166,12 @@ codePointAtFallback n s = case uncons s of
133166-- | Returns the number of code points in the leading sequence of code points
134167-- | which all match the given predicate. Operates in constant space and in
135168-- | time linear to the length of the string.
169+ -- |
170+ -- | ```purescript
171+ -- | >>> count (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
172+ -- | 2
173+ -- | ```
174+ -- |
136175count :: (CodePoint -> Boolean ) -> String -> Int
137176count = _count countFallback unsafeCodePointAt0
138177
@@ -155,19 +194,43 @@ countTail p s accum = case uncons s of
155194-- | Drops the given number of code points from the beginning of the string. If
156195-- | the string does not have that many code points, returns the empty string.
157196-- | Operates in constant space and in time linear to the given number.
197+ -- |
198+ -- | ```purescript
199+ -- | >>> drop 5 "𝐀𝐀 b c"
200+ -- | "c"
201+ -- | -- compared to Data.String:
202+ -- | >>> drop 5 "𝐀𝐀 b c"
203+ -- | "b c" -- because "𝐀" occupies 2 code units
204+ -- | ```
205+ -- |
158206drop :: Int -> String -> String
159207drop n s = String .drop (String .length (take n s)) s
160208
161209
162210-- | Drops the leading sequence of code points which all match the given
163211-- | predicate from the string. Operates in constant space and in time linear
164212-- | to the length of the string.
213+ -- |
214+ -- | ```purescript
215+ -- | >>> dropWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
216+ -- | " b c 𝐀"
217+ -- | ```
218+ -- |
165219dropWhile :: (CodePoint -> Boolean ) -> String -> String
166220dropWhile p s = drop (count p s) s
167221
168222
169223-- | Creates a string from an array of code points. Operates in space and time
170224-- | linear to the length of the array.
225+ -- |
226+ -- | ```purescript
227+ -- | >>> codePointArray = toCodePointArray "c 𝐀"
228+ -- | >>> codePointArray
229+ -- | [CodePoint 0x63, CodePoint 0x20, CodePoint 0x1D400]
230+ -- | >>> fromCodePointArray codePointArray
231+ -- | "c 𝐀"
232+ -- | ```
233+ -- |
171234fromCodePointArray :: Array CodePoint -> String
172235fromCodePointArray = _fromCodePointArray singletonFallback
173236
@@ -178,13 +241,29 @@ foreign import _fromCodePointArray
178241
179242-- | Returns the number of code points preceding the first match of the given
180243-- | pattern in the string. Returns Nothing when no matches are found.
244+ -- |
245+ -- | ```purescript
246+ -- | >>> indexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀"
247+ -- | Just 2
248+ -- | >>> indexOf (Pattern "o") "b 𝐀𝐀 c 𝐀"
249+ -- | Nothing
250+ -- | ```
251+ -- |
181252indexOf :: String.Pattern -> String -> Maybe Int
182253indexOf p s = (\i -> length (String .take i s)) <$> String .indexOf p s
183254
184255
185256-- | Returns the number of code points preceding the first match of the given
186257-- | pattern in the string. Pattern matches preceding the given index will be
187258-- | ignored. Returns Nothing when no matches are found.
259+ -- |
260+ -- | ```purescript
261+ -- | >>> indexOf' (Pattern "𝐀") 4 "b 𝐀𝐀 c 𝐀"
262+ -- | Just 7
263+ -- | >>> indexOf' (Pattern "o") 4 "b 𝐀𝐀 c 𝐀"
264+ -- | Nothing
265+ -- | ```
266+ -- |
188267indexOf' :: String.Pattern -> Int -> String -> Maybe Int
189268indexOf' p i s =
190269 let s' = drop i s in
@@ -193,13 +272,29 @@ indexOf' p i s =
193272
194273-- | Returns the number of code points preceding the last match of the given
195274-- | pattern in the string. Returns Nothing when no matches are found.
275+ -- |
276+ -- | ```purescript
277+ -- | >>> lastIndexOf (Pattern "𝐀") "b 𝐀𝐀 c 𝐀"
278+ -- | Just 7
279+ -- | >>> lastIndexOf (Pattern "o") "b 𝐀𝐀 c 𝐀"
280+ -- | Nothing
281+ -- | ```
282+ -- |
196283lastIndexOf :: String.Pattern -> String -> Maybe Int
197284lastIndexOf p s = (\i -> length (String .take i s)) <$> String .lastIndexOf p s
198285
199286
200287-- | Returns the number of code points preceding the first match of the given
201288-- | pattern in the string. Pattern matches following the given index will be
202289-- | ignored. Returns Nothing when no matches are found.
290+ -- |
291+ -- | ```purescript
292+ -- | >>> lastIndexOf' (Pattern "𝐀") 5 "b 𝐀𝐀 c 𝐀"
293+ -- | Just 3
294+ -- | >>> lastIndexOf' (Pattern "o") 5 "b 𝐀𝐀 c 𝐀"
295+ -- | Nothing
296+ -- | ```
297+ -- |
203298lastIndexOf' :: String.Pattern -> Int -> String -> Maybe Int
204299lastIndexOf' p i s =
205300 let i' = String .length (take i s) in
@@ -208,12 +303,27 @@ lastIndexOf' p i s =
208303
209304-- | Returns the number of code points in the string. Operates in constant
210305-- | space and in time linear to the length of the string.
306+ -- |
307+ -- | ```purescript
308+ -- | >>> length "b 𝐀𝐀 c 𝐀"
309+ -- | 8
310+ -- | -- compare to Data.String:
311+ -- | >>> length "b 𝐀𝐀 c 𝐀"
312+ -- | 11
313+ -- | ```
314+ -- |
211315length :: String -> Int
212316length = Array .length <<< toCodePointArray
213317
214318
215319-- | Creates a string containing just the given code point. Operates in
216320-- | constant space and time.
321+ -- |
322+ -- | ```purescript
323+ -- | >>> map singleton (codePointFromInt 0x1D400)
324+ -- | Just "𝐀"
325+ -- | ```
326+ -- |
217327singleton :: CodePoint -> String
218328singleton = _singleton singletonFallback
219329
@@ -233,6 +343,12 @@ singletonFallback (CodePoint cp) =
233343-- | Returns a record with strings created from the code points on either side
234344-- | of the given index. If the index is not within the string, Nothing is
235345-- | returned.
346+ -- |
347+ -- | ```purescript
348+ -- | >>> splitAt 3 "b 𝐀𝐀 c 𝐀"
349+ -- | Just { before: "b 𝐀", after: "𝐀 c 𝐀" }
350+ -- | ```
351+ -- |
236352splitAt :: Int -> String -> Maybe { before :: String , after :: String }
237353splitAt i s =
238354 let cps = toCodePointArray s in
@@ -248,6 +364,15 @@ splitAt i s =
248364-- | beginning of the given string. If the string does not have that many code
249365-- | points, returns the empty string. Operates in constant space and in time
250366-- | linear to the given number.
367+ -- |
368+ -- | ```purescript
369+ -- | >>> take 3 "b 𝐀𝐀 c 𝐀"
370+ -- | "b 𝐀"
371+ -- | -- compare to Data.String:
372+ -- | >>> take 3 "b 𝐀𝐀 c 𝐀"
373+ -- | "b �"
374+ -- | ```
375+ -- |
251376take :: Int -> String -> String
252377take = _take takeFallback
253378
@@ -263,12 +388,27 @@ takeFallback n s = case uncons s of
263388-- | Returns a string containing the leading sequence of code points which all
264389-- | match the given predicate from the string. Operates in constant space and
265390-- | in time linear to the length of the string.
391+ -- |
392+ -- | ```purescript
393+ -- | >>> takeWhile (\c -> codePointToInt c == 0x1D400) "𝐀𝐀 b c 𝐀"
394+ -- | "𝐀𝐀"
395+ -- | ```
396+ -- |
266397takeWhile :: (CodePoint -> Boolean ) -> String -> String
267398takeWhile p s = take (count p s) s
268399
269400
270401-- | Creates an array of code points from a string. Operates in space and time
271402-- | linear to the length of the string.
403+ -- |
404+ -- | ```purescript
405+ -- | >>> codePointArray = toCodePointArray "b 𝐀𝐀"
406+ -- | >>> codePointArray
407+ -- | [CodePoint 0x62, CodePoint 0x20, CodePoint 0x1D400, CodePoint 0x1D400]
408+ -- | >>> map singleton codePointArray
409+ -- | ["b", " ", "𝐀", "𝐀", " ", "c", " ", "𝐀"]
410+ -- | ```
411+ -- |
272412toCodePointArray :: String -> Array CodePoint
273413toCodePointArray = _toCodePointArray toCodePointArrayFallback unsafeCodePointAt0
274414
@@ -288,6 +428,14 @@ unconsButWithTuple s = (\{ head, tail } -> Tuple head tail) <$> uncons s
288428-- | Returns a record with the first code point and the remaining code points
289429-- | of the string. Returns Nothing if the string is empty. Operates in
290430-- | constant space and time.
431+ -- |
432+ -- | ```purescript
433+ -- | >>> uncons "𝐀𝐀 c 𝐀"
434+ -- | Just { head: CodePoint 0x1D400, tail: "𝐀 c 𝐀" }
435+ -- | >>> uncons ""
436+ -- | Nothing
437+ -- | ```
438+ -- |
291439uncons :: String -> Maybe { head :: CodePoint , tail :: String }
292440uncons s = case String .length s of
293441 0 -> Nothing
0 commit comments