scanf.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. // Copyright 2020 The Libc Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package libc // import "modernc.org/libc"
  5. import (
  6. "strings"
  7. "unsafe"
  8. )
  9. // The format string consists of a sequence of directives which describe how to
  10. // process the sequence of input characters. If processing of a directive
  11. // fails, no further input is read, and scanf() returns. A "failure" can
  12. // be either of the following: input failure, meaning that input characters
  13. // were unavailable, or matching failure, meaning that the input was
  14. // inappropriate.
  15. func scanf(r *strings.Reader, format, args uintptr) (nvalues int32) {
  16. // var src []byte //TODO-
  17. var ok bool
  18. out:
  19. for {
  20. c := *(*byte)(unsafe.Pointer(format))
  21. // src = append(src, c) //TODO-
  22. switch c {
  23. case '%':
  24. var n int
  25. var match bool
  26. format, n, match = scanfConversion(r, format, &args)
  27. if !match {
  28. break out
  29. }
  30. nvalues += int32(n)
  31. ok = true
  32. case 0:
  33. break out
  34. case ' ', '\t', '\n', '\r', '\v', '\f':
  35. format = skipWhiteSpace(format)
  36. ok = true
  37. next:
  38. for {
  39. c, err := r.ReadByte()
  40. if err != nil {
  41. break out
  42. }
  43. switch c {
  44. case ' ', '\t', '\n', '\r', '\v', '\f':
  45. // nop
  46. default:
  47. r.UnreadByte()
  48. break next
  49. }
  50. }
  51. default:
  52. c2, err := r.ReadByte()
  53. if err != nil {
  54. break out
  55. }
  56. if c2 != c {
  57. r.UnreadByte()
  58. break out
  59. }
  60. format++
  61. ok = true
  62. }
  63. }
  64. if ok {
  65. return nvalues
  66. }
  67. return -1 // stdio.EOF but not defined for windows
  68. }
  69. func scanfConversion(r *strings.Reader, format uintptr, args *uintptr) (_ uintptr, nvalues int, match bool) {
  70. format++ // '%'
  71. // Each conversion specification in format begins with either the character '%'
  72. // or the character sequence "%n$" (see below for the distinction) followed by:
  73. mod := 0
  74. width := -1
  75. flags:
  76. for {
  77. switch c := *(*byte)(unsafe.Pointer(format)); c {
  78. case '*':
  79. // An optional '*' assignment-suppression character: scanf() reads input as
  80. // directed by the conversion specification, but discards the input. No
  81. // corresponding pointer argument is re‐ quired, and this specification is not
  82. // included in the count of successful assignments returned by scanf().
  83. format++
  84. panic(todo(""))
  85. case '\'':
  86. // For decimal conversions, an optional quote character ('). This specifies
  87. // that the input number may include thousands' separators as defined by the
  88. // LC_NUMERIC category of the current locale. (See setlocale(3).) The quote
  89. // character may precede or follow the '*' assignment-suppression character.
  90. format++
  91. panic(todo(""))
  92. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  93. // An optional decimal integer which specifies the maximum field width.
  94. // Reading of characters stops either when this maximum is reached or when a
  95. // nonmatching character is found, whichever happens first. Most conversions
  96. // discard initial white space characters (the exceptions are noted below), and
  97. // these discarded characters don't count toward the maximum field width.
  98. // String input conversions store a terminating null byte ('\0') to mark the
  99. // end of the input; the maximum field width does not include this terminator.
  100. width = 0
  101. num:
  102. for {
  103. var digit int
  104. switch c := *(*byte)(unsafe.Pointer(format)); {
  105. default:
  106. break num
  107. case c >= '0' && c <= '9':
  108. format++
  109. digit = int(c) - '0'
  110. }
  111. width0 := width
  112. width = 10*width + digit
  113. if width < width0 {
  114. panic(todo(""))
  115. }
  116. }
  117. case 'h', 'j', 'l', 'L', 'q', 't', 'z':
  118. format, mod = parseLengthModifier(format)
  119. default:
  120. break flags
  121. }
  122. }
  123. // A conversion specifier that specifies the type of input conversion to be
  124. // performed.
  125. switch c := *(*byte)(unsafe.Pointer(format)); c {
  126. case '%':
  127. // Matches a literal '%'. That is, %% in the format string matches a single
  128. // input '%' character. No conversion is done (but initial white space
  129. // characters are discarded), and assign‐ ment does not occur.
  130. format++
  131. panic(todo(""))
  132. case 'd':
  133. // Matches an optionally signed decimal integer; the next pointer must be a
  134. // pointer to int.
  135. format++
  136. skipReaderWhiteSpace(r)
  137. var digit, n uint64
  138. allowSign := true
  139. neg := false
  140. dec:
  141. for ; width != 0; width-- {
  142. c, err := r.ReadByte()
  143. if err != nil {
  144. if match {
  145. break dec
  146. }
  147. panic(todo("", err))
  148. }
  149. if allowSign {
  150. switch c {
  151. case '-':
  152. allowSign = false
  153. neg = true
  154. continue
  155. case '+':
  156. allowSign = false
  157. continue
  158. }
  159. }
  160. switch {
  161. case c >= '0' && c <= '9':
  162. digit = uint64(c) - '0'
  163. default:
  164. r.UnreadByte()
  165. break dec
  166. }
  167. match = true
  168. n0 := n
  169. n = n*10 + digit
  170. if n < n0 {
  171. panic(todo(""))
  172. }
  173. }
  174. if !match {
  175. break
  176. }
  177. arg := VaUintptr(args)
  178. v := int64(n)
  179. if neg {
  180. v = -v
  181. }
  182. switch mod {
  183. case modNone:
  184. *(*int32)(unsafe.Pointer(arg)) = int32(v)
  185. case modH:
  186. *(*int16)(unsafe.Pointer(arg)) = int16(v)
  187. case modHH:
  188. *(*int8)(unsafe.Pointer(arg)) = int8(v)
  189. case modL:
  190. *(*long)(unsafe.Pointer(arg)) = long(n)
  191. default:
  192. panic(todo(""))
  193. }
  194. nvalues = 1
  195. case 'D':
  196. // Equivalent to ld; this exists only for backward compatibility. (Note:
  197. // thus only in libc4. In libc5 and glibc the %D is silently ignored, causing
  198. // old programs to fail mysteriously.)
  199. format++
  200. panic(todo(""))
  201. case 'i':
  202. // Matches an optionally signed integer; the next pointer must be a pointer to
  203. // int. The integer is read in base 16 if it begins with 0x or 0X, in base 8
  204. // if it begins with 0, and in base 10 otherwise. Only characters that
  205. // correspond to the base are used.
  206. format++
  207. panic(todo(""))
  208. case 'o':
  209. // Matches an unsigned octal integer; the next pointer must be a pointer to
  210. // unsigned int.
  211. format++
  212. panic(todo(""))
  213. case 'u':
  214. // Matches an unsigned decimal integer; the next pointer must be a pointer to
  215. // unsigned int.
  216. format++
  217. panic(todo(""))
  218. case 'x', 'X':
  219. // Matches an unsigned hexadecimal integer; the next pointer must be a pointer
  220. // to unsigned int.
  221. format++
  222. skipReaderWhiteSpace(r)
  223. var digit, n uint64
  224. allowPrefix := true
  225. var b []byte
  226. hex:
  227. for ; width != 0; width-- {
  228. c, err := r.ReadByte()
  229. if err != nil {
  230. if match {
  231. break hex
  232. }
  233. panic(todo("", err))
  234. }
  235. if allowPrefix {
  236. if len(b) == 1 && b[0] == '0' && (c == 'x' || c == 'X') {
  237. allowPrefix = false
  238. match = false
  239. b = nil
  240. continue
  241. }
  242. b = append(b, c)
  243. }
  244. switch {
  245. case c >= '0' && c <= '9':
  246. digit = uint64(c) - '0'
  247. case c >= 'a' && c <= 'f':
  248. digit = uint64(c) - 'a' + 10
  249. case c >= 'A' && c <= 'F':
  250. digit = uint64(c) - 'A' + 10
  251. default:
  252. r.UnreadByte()
  253. break hex
  254. }
  255. match = true
  256. n0 := n
  257. n = n<<4 + digit
  258. if n < n0 {
  259. panic(todo(""))
  260. }
  261. }
  262. if !match {
  263. break
  264. }
  265. arg := VaUintptr(args)
  266. switch mod {
  267. case modNone:
  268. *(*uint32)(unsafe.Pointer(arg)) = uint32(n)
  269. case modH:
  270. *(*uint16)(unsafe.Pointer(arg)) = uint16(n)
  271. case modHH:
  272. *(*byte)(unsafe.Pointer(arg)) = byte(n)
  273. case modL:
  274. *(*ulong)(unsafe.Pointer(arg)) = ulong(n)
  275. default:
  276. panic(todo(""))
  277. }
  278. nvalues = 1
  279. case 'f', 'e', 'g', 'E', 'a':
  280. // Matches an optionally signed floating-point number; the next pointer must be
  281. // a pointer to float.
  282. format++
  283. panic(todo(""))
  284. case 's':
  285. // Matches a sequence of non-white-space characters; the next pointer must be
  286. // a pointer to the initial element of a character array that is long enough to
  287. // hold the input sequence and the terminating null byte ('\0'), which is added
  288. // automatically. The input string stops at white space or at the maximum
  289. // field width, whichever occurs first.
  290. format++
  291. panic(todo(""))
  292. case 'c':
  293. // Matches a sequence of characters whose length is specified by the maximum
  294. // field width (default 1); the next pointer must be a pointer to char, and
  295. // there must be enough room for all the characters (no terminating null byte
  296. // is added). The usual skip of leading white space is suppressed. To skip
  297. // white space first, use an explicit space in the format.
  298. format++
  299. panic(todo(""))
  300. case '[':
  301. // Matches a nonempty sequence of characters from the specified set of
  302. // accepted characters; the next pointer must be a pointer to char, and there
  303. // must be enough room for all the char‐ acters in the string, plus a
  304. // terminating null byte. The usual skip of leading white space is suppressed.
  305. // The string is to be made up of characters in (or not in) a particular set;
  306. // the set is defined by the characters between the open bracket [ character
  307. // and a close bracket ] character. The set excludes those characters if the
  308. // first character after the open bracket is a circumflex (^). To include a
  309. // close bracket in the set, make it the first character after the open bracket
  310. // or the circumflex; any other position will end the set. The hyphen
  311. // character - is also special; when placed between two other characters, it
  312. // adds all intervening characters to the set. To include a hyphen, make it
  313. // the last character before the final close bracket. For instance, [^]0-9-]
  314. // means the set "everything except close bracket, zero through nine, and
  315. // hyphen". The string ends with the appearance of a character not in the
  316. // (or, with a circumflex, in) set or when the field width runs out.
  317. format++
  318. panic(todo(""))
  319. case 'p':
  320. // Matches a pointer value (as printed by %p in printf(3); the next pointer
  321. // must be a pointer to a pointer to void.
  322. format++
  323. skipReaderWhiteSpace(r)
  324. c, err := r.ReadByte()
  325. if err != nil {
  326. panic(todo(""))
  327. }
  328. if c != '0' {
  329. r.UnreadByte()
  330. panic(todo(""))
  331. }
  332. if c, err = r.ReadByte(); err != nil {
  333. panic(todo(""))
  334. }
  335. if c != 'x' && c != 'X' {
  336. r.UnreadByte()
  337. panic(todo(""))
  338. }
  339. var digit, n uint64
  340. ptr:
  341. for ; width != 0; width-- {
  342. c, err := r.ReadByte()
  343. if err != nil {
  344. if match {
  345. break ptr
  346. }
  347. panic(todo(""))
  348. }
  349. switch {
  350. case c >= '0' && c <= '9':
  351. digit = uint64(c) - '0'
  352. case c >= 'a' && c <= 'f':
  353. digit = uint64(c) - 'a' + 10
  354. case c >= 'A' && c <= 'F':
  355. digit = uint64(c) - 'A' + 10
  356. default:
  357. r.UnreadByte()
  358. break ptr
  359. }
  360. match = true
  361. n0 := n
  362. n = n<<4 + digit
  363. if n < n0 {
  364. panic(todo(""))
  365. }
  366. }
  367. if !match {
  368. break
  369. }
  370. arg := VaUintptr(args)
  371. *(*uintptr)(unsafe.Pointer(arg)) = uintptr(n)
  372. nvalues = 1
  373. case 'n':
  374. // Nothing is expected; instead, the number of characters consumed thus far
  375. // from the input is stored through the next pointer, which must be a pointer
  376. // to int. This is not a conversion and does not increase the count returned
  377. // by the function. The assignment can be suppressed with the *
  378. // assignment-suppression character, but the effect on the return value is
  379. // undefined. Therefore %*n conversions should not be used.
  380. format++
  381. panic(todo(""))
  382. default:
  383. panic(todo("%#U", c))
  384. }
  385. return format, nvalues, match
  386. }
  387. func skipReaderWhiteSpace(r *strings.Reader) error {
  388. for {
  389. c, err := r.ReadByte()
  390. if err != nil {
  391. return err
  392. }
  393. switch c {
  394. case ' ', '\t', '\n', '\r', '\v', '\f':
  395. // ok
  396. default:
  397. r.UnreadByte()
  398. return nil
  399. }
  400. }
  401. }
  402. func skipWhiteSpace(s uintptr) uintptr {
  403. for {
  404. switch c := *(*byte)(unsafe.Pointer(s)); c {
  405. case ' ', '\t', '\n', '\r', '\v', '\f':
  406. s++
  407. default:
  408. return s
  409. }
  410. }
  411. }