printf.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. // Copyright 2020 The Libc Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package libc // import "modernc.org/libc"
  5. import (
  6. "bytes"
  7. "fmt"
  8. "runtime"
  9. "strconv"
  10. "strings"
  11. "unsafe"
  12. )
  13. const (
  14. modNone = iota
  15. modHH
  16. modH
  17. modL
  18. modLL
  19. modLD
  20. modQ
  21. modCapitalL
  22. modJ
  23. modZ
  24. modCapitalZ
  25. modT
  26. mod32
  27. mod64
  28. )
  29. // Format of the format string
  30. //
  31. // The format string is a character string, beginning and ending in its initial
  32. // shift state, if any. The format string is composed of zero or more
  33. // directives: ordinary characters (not %), which are copied unchanged to
  34. // the output stream; and conversion specifications, each of which results in
  35. // fetching zero or more subsequent arguments.
  36. func printf(format, args uintptr) []byte {
  37. format0 := format
  38. args0 := args
  39. buf := bytes.NewBuffer(nil)
  40. for {
  41. switch c := *(*byte)(unsafe.Pointer(format)); c {
  42. case '%':
  43. format = printfConversion(buf, format, &args)
  44. case 0:
  45. if dmesgs {
  46. dmesg("%v: %q, %#x -> %q", origin(1), GoString(format0), args0, buf.Bytes())
  47. }
  48. return buf.Bytes()
  49. default:
  50. format++
  51. buf.WriteByte(c)
  52. }
  53. }
  54. }
  55. // Each conversion specification is introduced by the character %, and ends
  56. // with a conversion specifier. In between there may be (in this order) zero
  57. // or more flags, an optional minimum field width, an optional precision and
  58. // an optional length modifier.
  59. func printfConversion(buf *bytes.Buffer, format uintptr, args *uintptr) uintptr {
  60. format++ // '%'
  61. spec := "%"
  62. // Flags characters
  63. //
  64. // The character % is followed by zero or more of the following flags:
  65. flags:
  66. for {
  67. switch c := *(*byte)(unsafe.Pointer(format)); c {
  68. case '#':
  69. // The value should be converted to an "alternate form". For o conversions,
  70. // the first character of the output string is made zero (by prefixing a 0 if
  71. // it was not zero already). For x and X conversions, a nonzero result has
  72. // the string "0x" (or "0X" for X conversions) prepended to it. For a, A, e,
  73. // E, f, F, g, and G conversions, the result will always contain a decimal
  74. // point, even if no digits follow it (normally, a decimal point appears in the
  75. // results of those conversions only if a digit follows). For g and G
  76. // conversions, trailing zeros are not removed from the result as they would
  77. // otherwise be. For other conversions, the result is undefined.
  78. format++
  79. spec += "#"
  80. case '0':
  81. // The value should be zero padded. For d, i, o, u, x, X, a, A, e, E, f, F,
  82. // g, and G conversions, the converted value is padded on the left with zeros
  83. // rather than blanks. If the 0 and - flags both appear, the 0 flag is
  84. // ignored. If a precision is given with a numeric conversion (d, i, o, u, x,
  85. // and X), the 0 flag is ignored. For other conversions, the behav‐ ior is
  86. // undefined.
  87. format++
  88. spec += "0"
  89. case '-':
  90. // The converted value is to be left adjusted on the field boundary. (The
  91. // default is right justification.) The converted value is padded on the right
  92. // with blanks, rather than on the left with blanks or zeros. A - overrides a
  93. // 0 if both are given.
  94. format++
  95. spec += "-"
  96. case ' ':
  97. // A blank should be left before a positive number (or empty string) produced
  98. // by a signed conversion.
  99. format++
  100. spec += " "
  101. case '+':
  102. // A sign (+ or -) should always be placed before a number produced by a signed
  103. // conversion. By default, a sign is used only for negative numbers. A +
  104. // overrides a space if both are used.
  105. format++
  106. spec += "+"
  107. default:
  108. break flags
  109. }
  110. }
  111. format, width, hasWidth := parseFieldWidth(format)
  112. if hasWidth {
  113. spec += strconv.Itoa(width)
  114. }
  115. format, prec, hasPrecision := parsePrecision(format, args)
  116. format, mod := parseLengthModifier(format)
  117. var str string
  118. more:
  119. // Conversion specifiers
  120. //
  121. // A character that specifies the type of conversion to be applied. The
  122. // conversion specifiers and their meanings are:
  123. switch c := *(*byte)(unsafe.Pointer(format)); c {
  124. case 'd', 'i':
  125. // The int argument is converted to signed decimal notation. The precision,
  126. // if any, gives the minimum number of digits that must appear; if the
  127. // converted value requires fewer digits, it is padded on the left with zeros.
  128. // The default precision is 1. When 0 is printed with an explicit precision 0,
  129. // the output is empty.
  130. format++
  131. var arg int64
  132. if isWindows && mod == modL {
  133. mod = modNone
  134. }
  135. switch mod {
  136. case modL, modLL, mod64:
  137. arg = VaInt64(args)
  138. case modH:
  139. arg = int64(int16(VaInt32(args)))
  140. case modHH:
  141. arg = int64(int8(VaInt32(args)))
  142. case mod32, modNone:
  143. arg = int64(VaInt32(args))
  144. default:
  145. panic(todo("", mod))
  146. }
  147. if arg == 0 && hasPrecision && prec == 0 {
  148. break
  149. }
  150. if hasPrecision {
  151. panic(todo("", prec))
  152. }
  153. f := spec + "d"
  154. str = fmt.Sprintf(f, arg)
  155. case 'u':
  156. // The unsigned int argument is converted to unsigned decimal notation. The
  157. // precision, if any, gives the minimum number of digits that must appear; if
  158. // the converted value requires fewer digits, it is padded on the left with
  159. // zeros. The default precision is 1. When 0 is printed with an explicit
  160. // precision 0, the output is empty.
  161. format++
  162. var arg uint64
  163. if isWindows && mod == modL {
  164. mod = modNone
  165. }
  166. switch mod {
  167. case modNone:
  168. arg = uint64(VaUint32(args))
  169. case modL, modLL, mod64:
  170. arg = VaUint64(args)
  171. case modH:
  172. arg = uint64(uint16(VaInt32(args)))
  173. case modHH:
  174. arg = uint64(uint8(VaInt32(args)))
  175. case mod32:
  176. arg = uint64(VaInt32(args))
  177. default:
  178. panic(todo("", mod))
  179. }
  180. if arg == 0 && hasPrecision && prec == 0 {
  181. break
  182. }
  183. if hasPrecision {
  184. panic(todo("", prec))
  185. }
  186. f := spec + "d"
  187. str = fmt.Sprintf(f, arg)
  188. case 'o':
  189. // The unsigned int argument is converted to unsigned octal notation. The
  190. // precision, if any, gives the minimum number of digits that must appear; if
  191. // the converted value requires fewer digits, it is padded on the left with
  192. // zeros. The default precision is 1. When 0 is printed with an explicit
  193. // precision 0, the output is empty.
  194. format++
  195. var arg uint64
  196. if isWindows && mod == modL {
  197. mod = modNone
  198. }
  199. switch mod {
  200. case modNone:
  201. arg = uint64(VaUint32(args))
  202. case modL, modLL, mod64:
  203. arg = VaUint64(args)
  204. case modH:
  205. arg = uint64(uint16(VaInt32(args)))
  206. case modHH:
  207. arg = uint64(uint8(VaInt32(args)))
  208. case mod32:
  209. arg = uint64(VaInt32(args))
  210. default:
  211. panic(todo("", mod))
  212. }
  213. if arg == 0 && hasPrecision && prec == 0 {
  214. break
  215. }
  216. if hasPrecision {
  217. panic(todo("", prec))
  218. }
  219. f := spec + "o"
  220. str = fmt.Sprintf(f, arg)
  221. case 'I':
  222. if !isWindows {
  223. panic(todo("%#U", c))
  224. }
  225. format++
  226. switch c = *(*byte)(unsafe.Pointer(format)); c {
  227. case 'x', 'X':
  228. // https://docs.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-wsprintfa
  229. //
  230. // Ix, IX
  231. //
  232. // 64-bit unsigned hexadecimal integer in lowercase or uppercase on 64-bit
  233. // platforms, 32-bit unsigned hexadecimal integer in lowercase or uppercase on
  234. // 32-bit platforms.
  235. if unsafe.Sizeof(int(0)) == 4 {
  236. mod = mod32
  237. }
  238. case '3':
  239. // https://en.wikipedia.org/wiki/Printf_format_string#Length_field
  240. //
  241. // I32 For integer types, causes printf to expect a 32-bit (double word) integer argument.
  242. format++
  243. switch c = *(*byte)(unsafe.Pointer(format)); c {
  244. case '2':
  245. format++
  246. mod = mod32
  247. goto more
  248. default:
  249. panic(todo("%#U", c))
  250. }
  251. case '6':
  252. // https://en.wikipedia.org/wiki/Printf_format_string#Length_field
  253. //
  254. // I64 For integer types, causes printf to expect a 64-bit (quad word) integer argument.
  255. format++
  256. switch c = *(*byte)(unsafe.Pointer(format)); c {
  257. case '4':
  258. format++
  259. mod = mod64
  260. goto more
  261. default:
  262. panic(todo("%#U", c))
  263. }
  264. default:
  265. panic(todo("%#U", c))
  266. }
  267. fallthrough
  268. case 'X':
  269. fallthrough
  270. case 'x':
  271. // The unsigned int argument is converted to unsigned hexadecimal notation.
  272. // The letters abcdef are used for x conversions; the letters ABCDEF are used
  273. // for X conversions. The precision, if any, gives the minimum number of
  274. // digits that must appear; if the converted value requires fewer digits, it is
  275. // padded on the left with zeros. The default precision is 1. When 0 is
  276. // printed with an explicit precision 0, the output is empty.
  277. format++
  278. var arg uint64
  279. if isWindows && mod == modL {
  280. mod = modNone
  281. }
  282. switch mod {
  283. case modNone:
  284. arg = uint64(VaUint32(args))
  285. case modL, modLL, mod64:
  286. arg = VaUint64(args)
  287. case modH:
  288. arg = uint64(uint16(VaInt32(args)))
  289. case modHH:
  290. arg = uint64(uint8(VaInt32(args)))
  291. case mod32:
  292. arg = uint64(VaInt32(args))
  293. default:
  294. panic(todo("", mod))
  295. }
  296. if arg == 0 && hasPrecision && prec == 0 {
  297. break
  298. }
  299. if strings.Contains(spec, "#") && arg == 0 {
  300. spec = strings.ReplaceAll(spec, "#", "")
  301. }
  302. var f string
  303. switch {
  304. case hasPrecision:
  305. f = fmt.Sprintf("%s.%d%c", spec, prec, c)
  306. default:
  307. f = spec + string(c)
  308. }
  309. str = fmt.Sprintf(f, arg)
  310. case 'e', 'E':
  311. // The double argument is rounded and converted in the style [-]d.ddde±dd where
  312. // there is one digit before the decimal-point character and the number of
  313. // digits after it is equal to the precision; if the precision is missing, it
  314. // is taken as 6; if the precision is zero, no decimal-point character appears.
  315. // An E conversion uses the letter E (rather than e) to intro‐ duce the
  316. // exponent. The exponent always contains at least two digits; if the value is
  317. // zero, the exponent is 00.
  318. format++
  319. arg := VaFloat64(args)
  320. if !hasPrecision {
  321. prec = 6
  322. }
  323. f := fmt.Sprintf("%s.%d%c", spec, prec, c)
  324. str = fmt.Sprintf(f, arg)
  325. case 'f', 'F':
  326. // The double argument is rounded and converted to decimal notation in the
  327. // style [-]ddd.ddd, where the number of digits after the decimal-point
  328. // character is equal to the precision specification. If the precision
  329. // is missing, it is taken as 6; if the precision is explicitly zero, no
  330. // decimal-point character appears. If a decimal point appears, at least one
  331. // digit appears before it.
  332. format++
  333. arg := VaFloat64(args)
  334. if !hasPrecision {
  335. prec = 6
  336. }
  337. f := fmt.Sprintf("%s.%d%c", spec, prec, c)
  338. str = fixNanInf(fmt.Sprintf(f, arg))
  339. case 'G':
  340. fallthrough
  341. case 'g':
  342. // The double argument is converted in style f or e (or F or E for G
  343. // conversions). The precision specifies the number of significant digits. If
  344. // the precision is missing, 6 digits are given; if the precision is zero, it
  345. // is treated as 1. Style e is used if the exponent from its conversion is
  346. // less than -4 or greater than or equal to the precision. Trailing zeros are
  347. // removed from the fractional part of the result; a decimal point appears only
  348. // if it is followed by at least one digit.
  349. format++
  350. arg := VaFloat64(args)
  351. if !hasPrecision {
  352. prec = 6
  353. }
  354. if prec == 0 {
  355. prec = 1
  356. }
  357. f := fmt.Sprintf("%s.%d%c", spec, prec, c)
  358. str = fixNanInf(fmt.Sprintf(f, arg))
  359. case 's':
  360. // If no l modifier is present: the const char * argument is expected to be a
  361. // pointer to an array of character type (pointer to a string). Characters
  362. // from the array are written up to (but not including) a terminating null byte
  363. // ('\0'); if a precision is specified, no more than the number specified are
  364. // written. If a precision is given, no null byte need be present; if
  365. // the precision is not specified, or is greater than the size of the array,
  366. // the array must contain a terminating null byte.
  367. //
  368. // If an l modifier is present: the const wchar_t * argument is expected
  369. // to be a pointer to an array of wide characters. Wide characters from the
  370. // array are converted to multibyte characters (each by a call to the
  371. // wcrtomb(3) function, with a conversion state starting in the initial state
  372. // before the first wide character), up to and including a terminating null
  373. // wide character. The resulting multibyte characters are written up to
  374. // (but not including) the terminating null byte. If a precision is specified,
  375. // no more bytes than the number specified are written, but no partial
  376. // multibyte characters are written. Note that the precision determines the
  377. // number of bytes written, not the number of wide characters or screen
  378. // positions. The array must contain a terminating null wide character,
  379. // unless a precision is given and it is so small that the number of bytes
  380. // written exceeds it before the end of the array is reached.
  381. format++
  382. arg := VaUintptr(args)
  383. switch mod {
  384. case modNone:
  385. var f string
  386. switch {
  387. case hasPrecision:
  388. f = fmt.Sprintf("%s.%ds", spec, prec)
  389. str = fmt.Sprintf(f, GoString(arg))
  390. default:
  391. f = spec + "s"
  392. str = fmt.Sprintf(f, GoString(arg))
  393. }
  394. default:
  395. panic(todo(""))
  396. }
  397. case 'p':
  398. // The void * pointer argument is printed in hexadecimal (as if by %#x or
  399. // %#lx).
  400. format++
  401. switch runtime.GOOS {
  402. case "windows":
  403. switch runtime.GOARCH {
  404. case "386", "arm":
  405. fmt.Fprintf(buf, "%08X", VaUintptr(args))
  406. default:
  407. fmt.Fprintf(buf, "%016X", VaUintptr(args))
  408. }
  409. default:
  410. fmt.Fprintf(buf, "%#0x", VaUintptr(args))
  411. }
  412. case 'c':
  413. // If no l modifier is present, the int argument is converted to an unsigned
  414. // char, and the resulting character is written. If an l modifier is present,
  415. // the wint_t (wide character) ar‐ gument is converted to a multibyte sequence
  416. // by a call to the wcrtomb(3) function, with a conversion state starting in
  417. // the initial state, and the resulting multibyte string is writ‐ ten.
  418. format++
  419. switch mod {
  420. case modNone:
  421. arg := VaInt32(args)
  422. buf.WriteByte(byte(arg))
  423. default:
  424. panic(todo(""))
  425. }
  426. case '%':
  427. // A '%' is written. No argument is converted. The complete conversion
  428. // specification is '%%'.
  429. format++
  430. buf.WriteByte('%')
  431. default:
  432. panic(todo("%#U", c))
  433. }
  434. buf.WriteString(str)
  435. return format
  436. }
  437. // Field width
  438. //
  439. // An optional decimal digit string (with nonzero first digit) specifying a
  440. // minimum field width. If the converted value has fewer characters than the
  441. // field width, it will be padded with spa‐ ces on the left (or right, if the
  442. // left-adjustment flag has been given). Instead of a decimal digit string one
  443. // may write "*" or "*m$" (for some decimal integer m) to specify that the
  444. // field width is given in the next argument, or in the m-th argument,
  445. // respectively, which must be of type int. A negative field width is taken as
  446. // a '-' flag followed by a positive field width. In no case does a
  447. // nonexistent or small field width cause truncation of a field; if the result
  448. // of a conversion is wider than the field width, the field is expanded to
  449. // contain the conversion result.
  450. func parseFieldWidth(format uintptr) (_ uintptr, n int, ok bool) {
  451. first := true
  452. for {
  453. var digit int
  454. switch c := *(*byte)(unsafe.Pointer(format)); {
  455. case first && c == '0':
  456. return format, n, ok
  457. case first && c == '*':
  458. panic(todo(""))
  459. case c >= '0' && c <= '9':
  460. format++
  461. ok = true
  462. first = false
  463. digit = int(c) - '0'
  464. default:
  465. return format, n, ok
  466. }
  467. n0 := n
  468. n = 10*n + digit
  469. if n < n0 {
  470. panic(todo(""))
  471. }
  472. }
  473. }
  474. // Precision
  475. //
  476. // An optional precision, in the form of a period ('.') followed by an
  477. // optional decimal digit string. Instead of a decimal digit string one may
  478. // write "*" or "*m$" (for some decimal integer m) to specify that the
  479. // precision is given in the next argument, or in the m-th argument,
  480. // respectively, which must be of type int. If the precision is given as just
  481. // '.', the precision is taken to be zero. A negative precision is taken
  482. // as if the precision were omitted. This gives the minimum number of digits
  483. // to appear for d, i, o, u, x, and X conversions, the number of digits to
  484. // appear after the radix character for a, A, e, E, f, and F conversions, the
  485. // maximum number of significant digits for g and G conversions, or the maximum
  486. // number of characters to be printed from a string for s and S conversions.
  487. func parsePrecision(format uintptr, args *uintptr) (_ uintptr, n int, ok bool) {
  488. for {
  489. switch c := *(*byte)(unsafe.Pointer(format)); c {
  490. case '.':
  491. format++
  492. first := true
  493. for {
  494. switch c := *(*byte)(unsafe.Pointer(format)); {
  495. case first && c == '*':
  496. format++
  497. n = int(VaInt32(args))
  498. return format, n, true
  499. case c >= '0' && c <= '9':
  500. format++
  501. first = false
  502. n0 := n
  503. n = 10*n + (int(c) - '0')
  504. if n < n0 {
  505. panic(todo(""))
  506. }
  507. default:
  508. return format, n, true
  509. }
  510. }
  511. default:
  512. return format, 0, false
  513. }
  514. }
  515. }
  516. // Length modifier
  517. //
  518. // Here, "integer conversion" stands for d, i, o, u, x, or X conversion.
  519. //
  520. // hh A following integer conversion corresponds to a signed char or
  521. // unsigned char argument, or a following n conversion corresponds to a pointer
  522. // to a signed char argument.
  523. //
  524. // h A following integer conversion corresponds to a short int or unsigned
  525. // short int argument, or a following n conversion corresponds to a pointer to
  526. // a short int argument.
  527. //
  528. // l (ell) A following integer conversion corresponds to a long int or
  529. // unsigned long int argument, or a following n conversion corresponds to a
  530. // pointer to a long int argument, or a fol‐ lowing c conversion corresponds to
  531. // a wint_t argument, or a following s conversion corresponds to a pointer to
  532. // wchar_t argument.
  533. //
  534. // ll (ell-ell). A following integer conversion corresponds to a long long
  535. // int or unsigned long long int argument, or a following n conversion
  536. // corresponds to a pointer to a long long int argument.
  537. //
  538. // q A synonym for ll. This is a nonstandard extension, derived from BSD;
  539. // avoid its use in new code.
  540. //
  541. // L A following a, A, e, E, f, F, g, or G conversion corresponds to a
  542. // long double argument. (C99 allows %LF, but SUSv2 does not.)
  543. //
  544. // j A following integer conversion corresponds to an intmax_t or
  545. // uintmax_t argument, or a following n conversion corresponds to a pointer to
  546. // an intmax_t argument.
  547. //
  548. // z A following integer conversion corresponds to a size_t or ssize_t
  549. // argument, or a following n conversion corresponds to a pointer to a size_t
  550. // argument.
  551. //
  552. // Z A nonstandard synonym for z that predates the appearance of z. Do
  553. // not use in new code.
  554. //
  555. // t A following integer conversion corresponds to a ptrdiff_t argument,
  556. // or a following n conversion corresponds to a pointer to a ptrdiff_t
  557. // argument.
  558. func parseLengthModifier(format uintptr) (_ uintptr, n int) {
  559. switch c := *(*byte)(unsafe.Pointer(format)); c {
  560. case 'h':
  561. format++
  562. n = modH
  563. switch c := *(*byte)(unsafe.Pointer(format)); c {
  564. case 'h':
  565. format++
  566. n = modHH
  567. }
  568. return format, n
  569. case 'l':
  570. format++
  571. n = modL
  572. switch c := *(*byte)(unsafe.Pointer(format)); c {
  573. case 'l':
  574. format++
  575. n = modLL
  576. }
  577. return format, n
  578. case 'q':
  579. panic(todo(""))
  580. case 'L':
  581. format++
  582. n = modLD
  583. return format, n
  584. case 'j':
  585. panic(todo(""))
  586. case 'z':
  587. panic(todo(""))
  588. case 'Z':
  589. panic(todo(""))
  590. case 't':
  591. panic(todo(""))
  592. default:
  593. return format, 0
  594. }
  595. }
  596. func fixNanInf(s string) string {
  597. switch s {
  598. case "NaN":
  599. return "nan"
  600. case "+Inf", "-Inf":
  601. return "inf"
  602. default:
  603. return s
  604. }
  605. }