nfp.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. // Copyright 2022 The nfp Authors. All rights reserved. Use of this source code
  2. // is governed by a BSD-style license that can be found in the LICENSE file.
  3. //
  4. // This package NFP (Number Format Parser) produce syntax trees for number
  5. // format expression. Excel Number format controls options such the number of
  6. // decimal digits, the currency sign, commas to separate thousands, and
  7. // display of negative numbers. The number format of an index applies wherever
  8. // that index is used, including row or column headers of a table, or graph
  9. // axis that uses that index.
  10. //
  11. // Implementation with Go language by Ri Xu: https://xuri.me
  12. package nfp
  13. import "strings"
  14. // Asterisk, At and other's constants are token definitions.
  15. const (
  16. // Character constants
  17. Asterisk = "*"
  18. At = "@"
  19. BackSlash = "\\"
  20. BlockDelimiter = ";"
  21. BracketClose = "]"
  22. BracketOpen = "["
  23. Colon = ":"
  24. Comma = ","
  25. Dash = "-"
  26. Dollar = "$"
  27. Dot = "."
  28. Hash = "#"
  29. ParenClose = ")"
  30. ParenOpen = "("
  31. Percent = "%"
  32. Plus = "+"
  33. Question = "?"
  34. QuoteDouble = "\""
  35. QuoteSingle = "'"
  36. Slash = "/"
  37. Underscore = "_"
  38. Whitespace = " "
  39. Zero = "0"
  40. // DatesTimesCodeChars defined dates and times control codes in upper case
  41. DatesTimesCodeChars = "EYMDHSG"
  42. // NumCodeChars defined numeric code character
  43. NumCodeChars = "0123456789"
  44. // Token section types
  45. TokenSectionNegative = "Negative"
  46. TokenSectionPositive = "Positive"
  47. TokenSectionText = "Text"
  48. TokenSectionZero = "Zero"
  49. // Token subtypes
  50. TokenSubTypeCurrencyString = "CurrencyString"
  51. TokenSubTypeLanguageInfo = "LanguageInfo"
  52. TokenTypeColor = "Color"
  53. // Token types
  54. TokenTypeCondition = "Condition"
  55. TokenTypeCurrencyLanguage = "CurrencyLanguage"
  56. TokenTypeDateTimes = "DateTimes"
  57. TokenTypeDecimalPoint = "DecimalPoint"
  58. TokenTypeDenominator = "Denominator"
  59. TokenTypeDigitalPlaceHolder = "DigitalPlaceHolder"
  60. TokenTypeElapsedDateTimes = "ElapsedDateTimes"
  61. TokenTypeExponential = "Exponential"
  62. TokenTypeFraction = "Fraction"
  63. TokenTypeGeneral = "General"
  64. TokenTypeHashPlaceHolder = "HashPlaceHolder"
  65. TokenTypeLiteral = "Literal"
  66. TokenTypeOperand = "Operand"
  67. TokenTypeOperator = "Operator"
  68. TokenTypePercent = "Percent"
  69. TokenTypeRepeatsChar = "RepeatsChar"
  70. TokenTypeSwitchArgument = "SwitchArgument"
  71. TokenTypeTextPlaceHolder = "TextPlaceHolder"
  72. TokenTypeThousandsSeparator = "ThousandsSeparator"
  73. TokenTypeUnknown = "Unknown"
  74. TokenTypeZeroPlaceHolder = "ZeroPlaceHolder"
  75. )
  76. // ColorNames defined colors name used in for a section of the format, use the
  77. // name of one of the following eight colors in square brackets in the
  78. // section. The color code shall be the first item in the section.
  79. var ColorNames = []string{
  80. "black",
  81. "blue",
  82. "cyan",
  83. "green",
  84. "magenta",
  85. "red",
  86. "white",
  87. "yellow",
  88. }
  89. // GeneralFormattingSwitchArguments defined switch-arguments apply to fields
  90. // whose field result is a numeric value. If the result type of the field is
  91. // not numeric, then these switches have no effect.
  92. var GeneralFormattingSwitchArguments = []string{
  93. "AIUEO",
  94. "ALPHABETIC",
  95. "alphabetic",
  96. "Arabic",
  97. "ARABICABJAD",
  98. "ARABICALPHA",
  99. "ArabicDash",
  100. "BAHTTEXT",
  101. "CardText",
  102. "CHINESENUM1",
  103. "CHINESENUM2",
  104. "CHINESENUM3",
  105. "CHOSUNG",
  106. "CIRCLENUM",
  107. "DBCHAR",
  108. "DBNUM1",
  109. "DBNUM2",
  110. "DBNUM3",
  111. "DBNUM4",
  112. "DollarText",
  113. "GANADA",
  114. "GB1",
  115. "GB2",
  116. "GB3",
  117. "GB4",
  118. "HEBREW1",
  119. "HEBREW2",
  120. "Hex",
  121. "HINDIARABIC",
  122. "HINDICARDTEXT",
  123. "HINDILETTER1",
  124. "HINDILETTER2",
  125. "IROHA",
  126. "KANJINUM1",
  127. "KANJINUM2",
  128. "KANJINUM3",
  129. "Ordinal",
  130. "OrdText",
  131. "Roman",
  132. "roman",
  133. "SBCHAR",
  134. "THAIARABIC",
  135. "THAICARDTEXT",
  136. "THAILETTER",
  137. "VIETCARDTEXT",
  138. "ZODIAC1",
  139. "ZODIAC2",
  140. "ZODIAC3",
  141. }
  142. // AmPm defined the AM and PM with international considerations.
  143. var AmPm = []string{"AM/PM", "A/P", "上午/下午"}
  144. // ConditionOperators defined the condition operators.
  145. var ConditionOperators = []string{"<", "<=", ">", ">=", "<>", "="}
  146. // Part directly maps the sub part of the token.
  147. type Part struct {
  148. Token Token
  149. Value string
  150. }
  151. // Token encapsulate a number format token.
  152. type Token struct {
  153. TValue string
  154. TType string
  155. Parts []Part
  156. }
  157. // Section directly maps sections of the number format. Up to four sections of
  158. // format codes can be specified. The format codes, separated by semicolons,
  159. // define the formats for positive numbers, negative numbers, zero values, and
  160. // text, in that order. If only two sections are specified, the first is used
  161. // for positive numbers and zeros, and the second is used for negative
  162. // numbers. If only one section is specified, it is used for all numbers. To
  163. // skip a section, the ending semicolon for that section shall be written.
  164. type Section struct {
  165. Type string
  166. Items []Token
  167. }
  168. // Tokens directly maps the ordered list of tokens.
  169. // Attributes:
  170. //
  171. // Index - Current position in the number format expression
  172. // SectionIndex - Current position in section
  173. // Sections - Ordered section of token sequences
  174. //
  175. type Tokens struct {
  176. Index int
  177. SectionIndex int
  178. Sections []Section
  179. }
  180. // fTokens provides function to handle an ordered list of tokens.
  181. func fTokens() Tokens {
  182. return Tokens{
  183. Index: -1,
  184. }
  185. }
  186. // fToken provides function to encapsulate a number format token.
  187. func fToken(value, tokenType string, parts []Part) Token {
  188. return Token{
  189. TValue: value,
  190. TType: tokenType,
  191. Parts: parts,
  192. }
  193. }
  194. // add provides function to add a token to the end of the list.
  195. func (tk *Tokens) add(value, tokenType string, parts []Part) Token {
  196. token := fToken(value, tokenType, parts)
  197. tk.addRef(token)
  198. return token
  199. }
  200. // addRef provides function to add a token to the end of the list.
  201. func (tk *Tokens) addRef(token Token) {
  202. if len(tk.Sections) <= tk.SectionIndex {
  203. sectionType := []string{TokenSectionPositive, TokenSectionNegative, TokenSectionZero, TokenSectionText}[tk.SectionIndex]
  204. for i := len(tk.Sections) - 1; i < tk.SectionIndex; i++ {
  205. tk.Sections = append(tk.Sections, Section{Type: sectionType})
  206. }
  207. }
  208. tk.Sections[tk.SectionIndex].Items = append(tk.Sections[tk.SectionIndex].Items, token)
  209. }
  210. // reset provides function to reset the index to -1.
  211. func (tk *Tokens) reset() {
  212. tk.Index = -1
  213. }
  214. // Parser inheritable container.
  215. type Parser struct {
  216. InBracket bool
  217. InString bool
  218. InPlaceholder bool
  219. NumFmt string
  220. Offset int
  221. Tokens Tokens
  222. Token Token
  223. }
  224. // NumberFormatParser provides function to parse an Excel number format into a
  225. // stream of tokens.
  226. func NumberFormatParser() Parser {
  227. return Parser{}
  228. }
  229. // EOF provides function to check whether end of tokens stack.
  230. func (ps *Parser) EOF() bool {
  231. return ps.Offset >= len([]rune(ps.NumFmt))
  232. }
  233. // getTokens return a token stream (list).
  234. func (ps *Parser) getTokens() Tokens {
  235. ps.NumFmt = strings.TrimSpace(ps.NumFmt)
  236. // state-dependent character evaluation (order is important)
  237. for !ps.EOF() {
  238. if ps.InBracket {
  239. if ps.Token.TType == TokenTypeCurrencyLanguage {
  240. if ps.currentChar() != Dash && ps.currentChar() != BracketClose {
  241. ps.Token.Parts[1].Token.TValue += ps.currentChar()
  242. }
  243. if ps.currentChar() == Dash {
  244. ps.Token.Parts[0].Token.TValue, ps.Token.Parts[1].Token.TValue = ps.Token.Parts[1].Token.TValue, ps.Token.Parts[0].Token.TValue
  245. }
  246. }
  247. if len(ps.Token.TValue) > 1 && inStrSlice(ConditionOperators, ps.Token.TValue[1:], true) != -1 {
  248. if ps.currentChar() == Dash || strings.ContainsAny(NumCodeChars, ps.currentChar()) {
  249. ps.Token.TType = TokenTypeCondition
  250. ps.Token.Parts = []Part{
  251. {Token: Token{TType: TokenTypeOperator, TValue: ps.Token.TValue[1:]}},
  252. {Token: Token{TType: TokenTypeOperand}},
  253. }
  254. ps.Token.TValue = ""
  255. ps.Token.TValue += ps.currentChar()
  256. ps.Offset++
  257. continue
  258. }
  259. }
  260. if ps.currentChar() == BracketClose {
  261. ps.InBracket = false
  262. if ps.Token.TType == TokenTypeCondition && len(ps.Token.Parts) == 2 {
  263. ps.Token.Parts[1].Token.TValue = ps.Token.TValue
  264. ps.Tokens.add(ps.Token.Parts[0].Token.TValue+ps.Token.Parts[1].Token.TValue, ps.Token.TType, ps.Token.Parts)
  265. ps.Token = Token{}
  266. ps.Offset++
  267. continue
  268. }
  269. ps.Token.TValue += ps.currentChar()
  270. if l := len(ps.Token.TValue); l > 2 {
  271. lit := ps.Token.TValue[1 : l-1]
  272. if idx := inStrSlice(ColorNames, lit, false); idx != -1 {
  273. ps.Tokens.add(lit, TokenTypeColor, nil)
  274. ps.Token = Token{}
  275. ps.Offset++
  276. continue
  277. }
  278. if idx := inStrSlice(GeneralFormattingSwitchArguments, lit, false); idx != -1 {
  279. ps.Tokens.add(ps.Token.TValue, TokenTypeSwitchArgument, nil)
  280. ps.Token = Token{}
  281. ps.Offset++
  282. continue
  283. }
  284. if ps.Token.TType == TokenTypeCurrencyLanguage {
  285. if ps.Token.Parts[0].Token.TValue == "" {
  286. ps.Token.Parts = []Part{{Token: Token{TType: ps.Token.Parts[1].Token.TType, TValue: ps.Token.Parts[1].Token.TValue}}}
  287. }
  288. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  289. ps.Token = Token{}
  290. ps.Offset++
  291. continue
  292. }
  293. ps.Token.TType, ps.Token.TValue = TokenTypeUnknown, lit
  294. isDateTime := true
  295. for _, ch := range lit {
  296. if !strings.ContainsAny(DatesTimesCodeChars, strings.ToUpper(string(ch))) {
  297. isDateTime = false
  298. }
  299. }
  300. if isDateTime {
  301. ps.Token.TType = TokenTypeElapsedDateTimes
  302. }
  303. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  304. ps.Token = Token{}
  305. ps.Offset++
  306. continue
  307. }
  308. }
  309. }
  310. if !ps.InBracket {
  311. if strings.ContainsAny(NumCodeChars, ps.currentChar()) {
  312. if ps.Token.TType == TokenTypeZeroPlaceHolder || ps.Token.TType == TokenTypeDenominator {
  313. ps.Token.TValue += ps.currentChar()
  314. ps.Offset++
  315. continue
  316. }
  317. if ps.Token.TType == TokenTypeFraction {
  318. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  319. ps.Token = Token{TType: TokenTypeDenominator, TValue: ps.currentChar()}
  320. ps.Offset++
  321. continue
  322. }
  323. if ps.Token.TType != "" {
  324. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  325. ps.Token = Token{}
  326. }
  327. ps.Token.TType = TokenTypeZeroPlaceHolder
  328. if ps.currentChar() != Zero {
  329. ps.Token.TType = TokenTypeLiteral
  330. }
  331. ps.Token.TValue += ps.currentChar()
  332. ps.Offset++
  333. continue
  334. }
  335. if ps.currentChar() == Hash {
  336. if ps.Token.TType != TokenTypeHashPlaceHolder && ps.Token.TType != "" {
  337. if ps.Token.TValue == Dot {
  338. ps.Token.TType = TokenTypeDecimalPoint
  339. }
  340. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  341. ps.Token = Token{}
  342. }
  343. ps.Token.TType = TokenTypeHashPlaceHolder
  344. ps.Token.TValue += ps.currentChar()
  345. ps.Offset++
  346. continue
  347. }
  348. if ps.currentChar() == Dot {
  349. if ps.Token.TType == TokenTypeZeroPlaceHolder || ps.Token.TType == TokenTypeHashPlaceHolder {
  350. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  351. ps.Tokens.add(ps.currentChar(), TokenTypeDecimalPoint, ps.Token.Parts)
  352. ps.Token = Token{}
  353. ps.Offset++
  354. continue
  355. }
  356. if !ps.InString {
  357. if ps.Token.TType != "" && strings.ContainsAny(NumCodeChars, ps.nextChar()) {
  358. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  359. ps.Token = Token{}
  360. }
  361. ps.Token.TType = TokenTypeDecimalPoint
  362. }
  363. ps.Token.TValue += ps.currentChar()
  364. ps.Offset++
  365. continue
  366. }
  367. }
  368. if strings.ContainsAny(Dollar+Dash+Plus+ParenOpen+ParenClose+Colon+Whitespace, ps.currentChar()) {
  369. if ps.InBracket {
  370. if len(ps.Token.Parts) == 0 {
  371. ps.Token.Parts = []Part{
  372. {Token: Token{TType: TokenSubTypeCurrencyString}},
  373. {Token: Token{TType: TokenSubTypeLanguageInfo}},
  374. }
  375. }
  376. ps.Token.TValue += ps.currentChar()
  377. ps.Token.TType = TokenTypeCurrencyLanguage
  378. ps.Offset++
  379. continue
  380. }
  381. if ps.Token.TType != TokenTypeLiteral && ps.Token.TType != TokenTypeDateTimes && ps.Token.TType != "" {
  382. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  383. ps.Token = Token{TType: TokenTypeLiteral, TValue: ps.currentChar()}
  384. ps.Offset++
  385. continue
  386. }
  387. if ps.Token.TValue != BackSlash && ps.Token.TType == "" && inStrSlice(AmPm, ps.Token.TValue, false) == -1 {
  388. ps.Token.TType = TokenTypeLiteral
  389. }
  390. if ps.Token.TType == TokenTypeLiteral {
  391. ps.Token.TValue += ps.currentChar()
  392. ps.Offset++
  393. continue
  394. }
  395. }
  396. if ps.currentChar() == Underscore {
  397. ps.Offset += 2
  398. continue
  399. }
  400. if ps.currentChar() == Asterisk {
  401. ps.Tokens.add(ps.nextChar(), TokenTypeRepeatsChar, ps.Token.Parts)
  402. ps.Token = Token{}
  403. ps.Offset += 2
  404. continue
  405. }
  406. if ps.currentChar() == BackSlash {
  407. if ps.Token.TValue != "" {
  408. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  409. ps.Token = Token{}
  410. }
  411. ps.Tokens.add(ps.nextChar(), TokenTypeLiteral, ps.Token.Parts)
  412. ps.Token = Token{}
  413. ps.Offset += 2
  414. continue
  415. }
  416. if ps.currentChar() == Dash {
  417. if ps.Token.TType != "" {
  418. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  419. }
  420. ps.Token.TType = TokenTypeLiteral
  421. if ps.currentChar() != ps.nextChar() {
  422. ps.Tokens.add(ps.currentChar(), ps.Token.TType, ps.Token.Parts)
  423. }
  424. ps.Token = Token{}
  425. ps.Offset++
  426. continue
  427. }
  428. if ps.currentChar() == Comma {
  429. if ps.Token.TType == TokenTypeZeroPlaceHolder || ps.Token.TType == TokenTypeHashPlaceHolder {
  430. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  431. ps.Tokens.add(ps.currentChar(), TokenTypeThousandsSeparator, ps.Token.Parts)
  432. ps.Token = Token{}
  433. ps.Offset++
  434. continue
  435. }
  436. if !ps.InString {
  437. if ps.Token.TType == TokenTypeLiteral {
  438. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  439. ps.Token = Token{TType: TokenTypeThousandsSeparator}
  440. }
  441. if ps.Token.TType == TokenTypeDateTimes {
  442. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  443. ps.Token = Token{TType: TokenTypeLiteral}
  444. }
  445. if ps.currentChar() != ps.nextChar() {
  446. if ps.Token.TType == "" {
  447. ps.Token.TType = TokenTypeLiteral
  448. }
  449. ps.Tokens.add(ps.currentChar(), ps.Token.TType, ps.Token.Parts)
  450. }
  451. ps.Token = Token{}
  452. ps.Offset++
  453. continue
  454. }
  455. ps.Token.TType = TokenTypeLiteral
  456. ps.Token.TValue += ps.currentChar()
  457. ps.Offset++
  458. continue
  459. }
  460. if ps.currentChar() == Whitespace {
  461. if inStrSlice(AmPm, ps.Token.TValue, false) != -1 {
  462. ps.Token.TType = TokenTypeDateTimes
  463. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  464. ps.Token = Token{}
  465. ps.Offset++
  466. continue
  467. }
  468. if ps.Token.TType != "" && ps.Token.TType != TokenTypeLiteral {
  469. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  470. }
  471. ps.Token.TType = TokenTypeLiteral
  472. ps.Tokens.add(ps.currentChar(), ps.Token.TType, ps.Token.Parts)
  473. ps.Token = Token{}
  474. ps.Offset++
  475. continue
  476. }
  477. if ps.currentChar() == Slash {
  478. if ps.Token.TType == TokenTypeDateTimes {
  479. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  480. ps.Tokens.add(ps.currentChar(), TokenTypeLiteral, ps.Token.Parts)
  481. ps.Token = Token{}
  482. ps.Offset++
  483. continue
  484. }
  485. if ps.Token.TType == TokenTypeDigitalPlaceHolder {
  486. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  487. ps.Token = Token{TType: TokenTypeFraction, TValue: ps.currentChar()}
  488. ps.Offset++
  489. continue
  490. }
  491. }
  492. if ps.currentChar() == Colon && ps.Token.TType == TokenTypeDateTimes {
  493. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  494. ps.Tokens.add(ps.currentChar(), TokenTypeLiteral, ps.Token.Parts)
  495. ps.Token = Token{}
  496. ps.Offset++
  497. continue
  498. }
  499. if ps.currentChar() == QuoteDouble {
  500. ps.Offset++
  501. if ps.InString && len(ps.Token.TValue) > 0 {
  502. ps.Tokens.add(ps.Token.TValue, TokenTypeLiteral, ps.Token.Parts)
  503. ps.Token = Token{}
  504. ps.InString = false
  505. continue
  506. }
  507. if ps.Token.TValue != "" {
  508. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  509. }
  510. ps.InString = true
  511. ps.Token = Token{TType: TokenTypeLiteral}
  512. continue
  513. }
  514. if ps.currentChar() == At {
  515. if len(ps.Tokens.Sections) <= ps.Tokens.SectionIndex {
  516. ps.Tokens.Sections = append(ps.Tokens.Sections, Section{Type: TokenSectionText})
  517. }
  518. ps.Tokens.Sections[ps.Tokens.SectionIndex].Type = TokenSectionText
  519. if ps.Token.TType != "" && !ps.InBracket {
  520. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  521. }
  522. ps.Token = Token{TType: TokenTypeTextPlaceHolder, TValue: ps.currentChar()}
  523. ps.Offset++
  524. continue
  525. }
  526. if ps.currentChar() == BracketOpen {
  527. if ps.Token.TType != "" && !ps.InBracket {
  528. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  529. ps.Token = Token{}
  530. }
  531. ps.InBracket = true
  532. ps.Token.TValue += ps.currentChar()
  533. ps.Offset++
  534. continue
  535. }
  536. if ps.currentChar() == Question {
  537. if ps.Token.TType != "" && ps.Token.TType != TokenTypeDigitalPlaceHolder {
  538. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  539. ps.Token = Token{}
  540. }
  541. ps.Token.TType = TokenTypeDigitalPlaceHolder
  542. ps.Token.TValue += ps.currentChar()
  543. ps.Offset++
  544. continue
  545. }
  546. if ps.currentChar() == Percent {
  547. if ps.Token.TType != "" {
  548. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  549. ps.Token = Token{}
  550. }
  551. ps.Token.TType = TokenTypePercent
  552. ps.Token.TValue += ps.currentChar()
  553. ps.Offset++
  554. continue
  555. }
  556. if ps.currentChar() == BlockDelimiter {
  557. sectionTypes := []string{TokenSectionPositive, TokenSectionNegative, TokenSectionZero, TokenSectionText}
  558. if ps.Token.TType != "" {
  559. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  560. }
  561. if len(ps.Tokens.Sections) <= ps.Tokens.SectionIndex {
  562. ps.Tokens.Sections = append(ps.Tokens.Sections, Section{Type: sectionTypes[ps.Tokens.SectionIndex]})
  563. }
  564. ps.Tokens.SectionIndex++
  565. if ps.Tokens.SectionIndex > 3 {
  566. tokens := fTokens()
  567. tokens.reset()
  568. return Tokens{}
  569. }
  570. ps.Token = Token{}
  571. ps.Tokens.Sections = append(ps.Tokens.Sections, Section{Type: sectionTypes[ps.Tokens.SectionIndex]})
  572. ps.Offset++
  573. continue
  574. }
  575. if strings.EqualFold("E+", ps.doubleChar()) {
  576. if ps.Token.TType != "" {
  577. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  578. ps.Token = Token{}
  579. }
  580. ps.Token.TType = TokenTypeExponential
  581. ps.Token.TValue += ps.doubleChar()
  582. ps.Offset += 2
  583. continue
  584. }
  585. if ap, matched := ps.apPattern(); ap != -1 {
  586. ps.Tokens.add(matched, TokenTypeDateTimes, ps.Token.Parts)
  587. ps.Token = Token{}
  588. ps.Offset += len(matched)
  589. continue
  590. }
  591. if general, matched := ps.generalPattern(); general != -1 {
  592. ps.Tokens.add(matched, TokenTypeGeneral, ps.Token.Parts)
  593. ps.Token = Token{}
  594. ps.Offset += len(matched)
  595. continue
  596. }
  597. // token accumulation
  598. if !ps.InBracket && !ps.InString {
  599. if strings.ContainsAny(DatesTimesCodeChars, strings.ToUpper(ps.currentChar())) {
  600. if inStrSlice(AmPm, ps.Token.TValue, false) != -1 {
  601. ps.Token.TType = TokenTypeDateTimes
  602. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  603. ps.Token = Token{}
  604. }
  605. if ps.Token.TType == TokenTypeLiteral || ps.Token.TType == TokenTypeDateTimes && !strings.ContainsAny(ps.Token.TValue, ps.currentChar()) {
  606. ps.Tokens.add(ps.Token.TValue, ps.Token.TType, ps.Token.Parts)
  607. ps.Token = Token{}
  608. }
  609. ps.Token.TType = TokenTypeDateTimes
  610. ps.Token.TValue += ps.currentChar()
  611. ps.Offset++
  612. continue
  613. }
  614. if strings.ContainsAny(DatesTimesCodeChars, strings.ToUpper(ps.Token.TValue)) {
  615. ps.Tokens.add(ps.Token.TValue, TokenTypeDateTimes, ps.Token.Parts)
  616. ps.Token = Token{TType: TokenTypeLiteral, TValue: ps.currentChar()}
  617. ps.Offset++
  618. continue
  619. }
  620. if strings.ContainsAny(DatesTimesCodeChars, strings.ToUpper(ps.nextChar())) {
  621. ps.Token.TValue += ps.currentChar()
  622. ps.Token.TType = TokenTypeLiteral
  623. ps.Offset++
  624. continue
  625. }
  626. if ps.currentChar() == QuoteSingle {
  627. ps.Offset++
  628. continue
  629. }
  630. }
  631. ps.Token.TValue += ps.currentChar()
  632. ps.Offset++
  633. }
  634. // dump remaining accumulation
  635. if len(ps.Token.TValue) > 0 {
  636. tokenType := TokenTypeLiteral
  637. if ps.Token.TType != "" {
  638. tokenType = ps.Token.TType
  639. }
  640. ps.Tokens.add(ps.Token.TValue, tokenType, nil)
  641. }
  642. tokens := fTokens()
  643. tokens.reset()
  644. return ps.Tokens
  645. }
  646. // Parse provides function to parse number format as a token stream (list).
  647. func (ps *Parser) Parse(numFmt string) []Section {
  648. ps.NumFmt = numFmt
  649. ps.Tokens = ps.getTokens()
  650. return ps.Tokens.Sections
  651. }
  652. // doubleChar provides function to get two characters after the current
  653. // position.
  654. func (ps *Parser) doubleChar() string {
  655. if len([]rune(ps.NumFmt)) >= ps.Offset+2 {
  656. return string([]rune(ps.NumFmt)[ps.Offset : ps.Offset+2])
  657. }
  658. return ""
  659. }
  660. // currentChar provides function to get the character of the current position.
  661. func (ps *Parser) currentChar() string {
  662. return string([]rune(ps.NumFmt)[ps.Offset])
  663. }
  664. // nextChar provides function to get the next character of the current
  665. // position.
  666. func (ps *Parser) nextChar() string {
  667. if len([]rune(ps.NumFmt)) >= ps.Offset+2 {
  668. return string([]rune(ps.NumFmt)[ps.Offset+1 : ps.Offset+2])
  669. }
  670. return ""
  671. }
  672. // apPattern infers whether the subsequent characters match the AM/PM pattern,
  673. // it will be returned matched index and result.
  674. func (ps *Parser) apPattern() (int, string) {
  675. for i, pattern := range AmPm {
  676. l := len(pattern)
  677. if len([]rune(ps.NumFmt)) >= ps.Offset+l {
  678. matched := string([]rune(ps.NumFmt)[ps.Offset : ps.Offset+l])
  679. if strings.EqualFold(matched, pattern) {
  680. return i, matched
  681. }
  682. }
  683. }
  684. return -1, ""
  685. }
  686. // generalPattern infers whether the subsequent characters match the
  687. // general pattern, it will be returned matched result and result.
  688. func (ps *Parser) generalPattern() (int, string) {
  689. l := len(TokenTypeGeneral)
  690. if len([]rune(ps.NumFmt)) >= ps.Offset+l {
  691. matched := string([]rune(ps.NumFmt)[ps.Offset : ps.Offset+l])
  692. if strings.EqualFold(matched, TokenTypeGeneral) {
  693. return 0, matched
  694. }
  695. }
  696. return -1, ""
  697. }
  698. // inStrSlice provides a method to check if an element is present in an array,
  699. // and return the index of its location, otherwise return -1.
  700. func inStrSlice(a []string, x string, caseSensitive bool) int {
  701. for idx, n := range a {
  702. if !caseSensitive && strings.EqualFold(x, n) {
  703. return idx
  704. }
  705. if x == n {
  706. return idx
  707. }
  708. }
  709. return -1
  710. }
  711. // PrettyPrint provides function to pretty the parsed result with the indented
  712. // format.
  713. func (ps *Parser) PrettyPrint() string {
  714. indent, output := 0, ""
  715. for _, section := range ps.Tokens.Sections {
  716. output += "<" + section.Type + ">" + "\n"
  717. for _, item := range section.Items {
  718. indent++
  719. for i := 0; i < indent; i++ {
  720. output += "\t"
  721. }
  722. if len(item.Parts) == 0 {
  723. output += item.TValue + " <" + item.TType + ">" + "\n"
  724. } else {
  725. output += "<" + item.TType + ">" + "\n"
  726. }
  727. for _, part := range item.Parts {
  728. indent++
  729. for i := 0; i < indent; i++ {
  730. output += "\t"
  731. }
  732. output += part.Token.TValue + " <" + part.Token.TType + ">" + "\n"
  733. indent--
  734. }
  735. indent--
  736. }
  737. }
  738. return output
  739. }