Source: lib/text/ttml_text_parser.js

  1. /**
  2. * @license
  3. * Copyright 2016 Google Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. goog.provide('shaka.text.TtmlTextParser');
  18. goog.require('goog.asserts');
  19. goog.require('shaka.log');
  20. goog.require('shaka.text.Cue');
  21. goog.require('shaka.text.CueRegion');
  22. goog.require('shaka.text.TextEngine');
  23. goog.require('shaka.util.ArrayUtils');
  24. goog.require('shaka.util.Error');
  25. goog.require('shaka.util.StringUtils');
  26. goog.require('shaka.util.XmlUtils');
  27. /**
  28. * @constructor
  29. * @implements {shaka.extern.TextParser}
  30. * @export
  31. */
  32. shaka.text.TtmlTextParser = function() {};
  33. /**
  34. * @const {string}
  35. * @private
  36. */
  37. shaka.text.TtmlTextParser.parameterNs_ = 'http://www.w3.org/ns/ttml#parameter';
  38. /**
  39. * @const {string}
  40. * @private
  41. */
  42. shaka.text.TtmlTextParser.styleNs_ = 'http://www.w3.org/ns/ttml#styling';
  43. /**
  44. * @override
  45. * @export
  46. */
  47. shaka.text.TtmlTextParser.prototype.parseInit = function(data) {
  48. goog.asserts.assert(false, 'TTML does not have init segments');
  49. };
  50. /**
  51. * @override
  52. * @export
  53. */
  54. shaka.text.TtmlTextParser.prototype.parseMedia = function(data, time) {
  55. const TtmlTextParser = shaka.text.TtmlTextParser;
  56. const XmlUtils = shaka.util.XmlUtils;
  57. const ttpNs = TtmlTextParser.parameterNs_;
  58. const ttsNs = TtmlTextParser.styleNs_;
  59. let str = shaka.util.StringUtils.fromUTF8(data);
  60. let ret = [];
  61. let parser = new DOMParser();
  62. let xml = null;
  63. // dont try to parse empty string as
  64. // DOMParser will not throw error but return an errored xml
  65. if (str == '') {
  66. return ret;
  67. }
  68. try {
  69. xml = parser.parseFromString(str, 'text/xml');
  70. } catch (exception) {
  71. throw new shaka.util.Error(
  72. shaka.util.Error.Severity.CRITICAL,
  73. shaka.util.Error.Category.TEXT,
  74. shaka.util.Error.Code.INVALID_XML,
  75. 'Failed to parse TTML.');
  76. }
  77. if (xml) {
  78. const parserError = xml.getElementsByTagName('parsererror')[0];
  79. if (parserError) {
  80. throw new shaka.util.Error(
  81. shaka.util.Error.Severity.CRITICAL,
  82. shaka.util.Error.Category.TEXT,
  83. shaka.util.Error.Code.INVALID_XML,
  84. parserError.textContent);
  85. }
  86. // Try to get the framerate, subFrameRate and frameRateMultiplier
  87. // if applicable
  88. let frameRate = null;
  89. let subFrameRate = null;
  90. let frameRateMultiplier = null;
  91. let tickRate = null;
  92. let spaceStyle = null;
  93. let extent = null;
  94. let tts = xml.getElementsByTagName('tt');
  95. let tt = tts[0];
  96. // TTML should always have tt element.
  97. if (!tt) {
  98. throw new shaka.util.Error(
  99. shaka.util.Error.Severity.CRITICAL,
  100. shaka.util.Error.Category.TEXT,
  101. shaka.util.Error.Code.INVALID_XML,
  102. 'TTML does not contain <tt> tag.');
  103. } else {
  104. frameRate = XmlUtils.getAttributeNS(tt, ttpNs, 'frameRate');
  105. subFrameRate = XmlUtils.getAttributeNS(tt, ttpNs, 'subFrameRate');
  106. frameRateMultiplier =
  107. XmlUtils.getAttributeNS(tt, ttpNs, 'frameRateMultiplier');
  108. tickRate = XmlUtils.getAttributeNS(tt, ttpNs, 'tickRate');
  109. spaceStyle = tt.getAttribute('xml:space') || 'default';
  110. extent = XmlUtils.getAttributeNS(tt, ttsNs, 'extent');
  111. }
  112. if (spaceStyle != 'default' && spaceStyle != 'preserve') {
  113. throw new shaka.util.Error(
  114. shaka.util.Error.Severity.CRITICAL,
  115. shaka.util.Error.Category.TEXT,
  116. shaka.util.Error.Code.INVALID_XML,
  117. 'Invalid xml:space value: ' + spaceStyle);
  118. }
  119. let whitespaceTrim = spaceStyle == 'default';
  120. let rateInfo = new TtmlTextParser.RateInfo_(
  121. frameRate, subFrameRate, frameRateMultiplier, tickRate);
  122. const metadataElements = TtmlTextParser.getLeafNodes_(
  123. tt.getElementsByTagName('metadata')[0]);
  124. let styles = TtmlTextParser.getLeafNodes_(
  125. tt.getElementsByTagName('styling')[0]);
  126. let regionElements = TtmlTextParser.getLeafNodes_(
  127. tt.getElementsByTagName('layout')[0]);
  128. let cueRegions = [];
  129. for (let i = 0; i < regionElements.length; i++) {
  130. let cueRegion = TtmlTextParser.parseCueRegion_(
  131. regionElements[i], styles, extent);
  132. if (cueRegion) {
  133. cueRegions.push(cueRegion);
  134. }
  135. }
  136. const textNodes = TtmlTextParser.getLeafCues_(
  137. tt.getElementsByTagName('body')[0]);
  138. for (const node of textNodes) {
  139. const cue = TtmlTextParser.parseCue_(
  140. node, time.periodStart, rateInfo, metadataElements, styles,
  141. regionElements, cueRegions, whitespaceTrim, false);
  142. if (cue) {
  143. ret.push(cue);
  144. }
  145. }
  146. }
  147. return ret;
  148. };
  149. /**
  150. * @const
  151. * @private {!RegExp}
  152. * @example 50.17% 10%
  153. */
  154. shaka.text.TtmlTextParser.percentValues_ =
  155. /^(\d{1,2}(?:\.\d+)?|100(?:\.0+)?)% (\d{1,2}(?:\.\d+)?|100(?:\.0+)?)%$/;
  156. /**
  157. * @const
  158. * @private {!RegExp}
  159. * @example 100px
  160. */
  161. shaka.text.TtmlTextParser.unitValues_ = /^(\d+px|\d+em)$/;
  162. /**
  163. * @const
  164. * @private {!RegExp}
  165. * @example 100px
  166. */
  167. shaka.text.TtmlTextParser.pixelValues_ = /^(\d+)px (\d+)px$/;
  168. /**
  169. * @const
  170. * @private {!RegExp}
  171. * @example 00:00:40:07 (7 frames) or 00:00:40:07.1 (7 frames, 1 subframe)
  172. */
  173. shaka.text.TtmlTextParser.timeColonFormatFrames_ =
  174. /^(\d{2,}):(\d{2}):(\d{2}):(\d{2})\.?(\d+)?$/;
  175. /**
  176. * @const
  177. * @private {!RegExp}
  178. * @example 00:00:40 or 00:40
  179. */
  180. shaka.text.TtmlTextParser.timeColonFormat_ = /^(?:(\d{2,}):)?(\d{2}):(\d{2})$/;
  181. /**
  182. * @const
  183. * @private {!RegExp}
  184. * @example 01:02:43.0345555 or 02:43.03
  185. */
  186. shaka.text.TtmlTextParser.timeColonFormatMilliseconds_ =
  187. /^(?:(\d{2,}):)?(\d{2}):(\d{2}\.\d{2,})$/;
  188. /**
  189. * @const
  190. * @private {!RegExp}
  191. * @example 75f or 75.5f
  192. */
  193. shaka.text.TtmlTextParser.timeFramesFormat_ = /^(\d*(?:\.\d*)?)f$/;
  194. /**
  195. * @const
  196. * @private {!RegExp}
  197. * @example 50t or 50.5t
  198. */
  199. shaka.text.TtmlTextParser.timeTickFormat_ = /^(\d*(?:\.\d*)?)t$/;
  200. /**
  201. * @const
  202. * @private {!RegExp}
  203. * @example 3.45h, 3m or 4.20s
  204. */
  205. shaka.text.TtmlTextParser.timeHMSFormat_ =
  206. new RegExp(['^(?:(\\d*(?:\\.\\d*)?)h)?',
  207. '(?:(\\d*(?:\\.\\d*)?)m)?',
  208. '(?:(\\d*(?:\\.\\d*)?)s)?',
  209. '(?:(\\d*(?:\\.\\d*)?)ms)?$'].join(''));
  210. /**
  211. * @const
  212. * @private {!Object.<string, shaka.text.Cue.lineAlign>}
  213. */
  214. shaka.text.TtmlTextParser.textAlignToLineAlign_ = {
  215. 'left': shaka.text.Cue.lineAlign.START,
  216. 'center': shaka.text.Cue.lineAlign.CENTER,
  217. 'right': shaka.text.Cue.lineAlign.END,
  218. 'start': shaka.text.Cue.lineAlign.START,
  219. 'end': shaka.text.Cue.lineAlign.END,
  220. };
  221. /**
  222. * @const
  223. * @private {!Object.<string, shaka.text.Cue.positionAlign>}
  224. */
  225. shaka.text.TtmlTextParser.textAlignToPositionAlign_ = {
  226. 'left': shaka.text.Cue.positionAlign.LEFT,
  227. 'center': shaka.text.Cue.positionAlign.CENTER,
  228. 'right': shaka.text.Cue.positionAlign.RIGHT,
  229. };
  230. /**
  231. * Gets the leaf nodes of the xml node tree. Ignores the text, br elements
  232. * and the spans positioned inside paragraphs
  233. *
  234. * @param {Element} element
  235. * @return {!Array.<!Element>}
  236. * @private
  237. */
  238. shaka.text.TtmlTextParser.getLeafNodes_ = function(element) {
  239. let result = [];
  240. if (!element) {
  241. return result;
  242. }
  243. for (const node of element.childNodes) {
  244. if (
  245. node.nodeType == Node.ELEMENT_NODE &&
  246. node.nodeName !== 'br'
  247. ) {
  248. // Get the leaves the child might contain.
  249. goog.asserts.assert(node instanceof Element,
  250. 'Node should be Element!');
  251. const leafChildren = shaka.text.TtmlTextParser.getLeafNodes_(
  252. /** @type {Element} */(node));
  253. goog.asserts.assert(leafChildren.length > 0,
  254. 'Only a null Element should return no leaves!');
  255. result = result.concat(leafChildren);
  256. }
  257. }
  258. // if no result at this point, the element itself must be a leaf.
  259. if (!result.length) {
  260. result.push(element);
  261. }
  262. return result;
  263. };
  264. /**
  265. * Get the leaf nodes that can act as cues
  266. * (at least begin attribute)
  267. *
  268. * @param {Element} element
  269. * @return {!Array.<!Element>}
  270. * @private
  271. */
  272. shaka.text.TtmlTextParser.getLeafCues_ = function(element) {
  273. if (!element) {
  274. return [];
  275. }
  276. let ret = [];
  277. // Recursively find any child elements that have a 'begin' attribute.
  278. for (const child of element.childNodes) {
  279. if (child instanceof Element) {
  280. if (child.hasAttribute('begin')) {
  281. ret.push(child);
  282. } else {
  283. ret = ret.concat(shaka.text.TtmlTextParser.getLeafCues_(child));
  284. }
  285. }
  286. }
  287. return ret;
  288. };
  289. /**
  290. * Trims and removes multiple spaces from a string
  291. *
  292. * @param {Element} element
  293. * @param {boolean} whitespaceTrim
  294. * @return {string}
  295. * @private
  296. */
  297. shaka.text.TtmlTextParser.sanitizeTextContent = function(
  298. element, whitespaceTrim) {
  299. let payload = '';
  300. for (const node of element.childNodes) {
  301. if (node.nodeName == 'br' && element.childNodes[0] !== node) {
  302. payload += '\n';
  303. } else if (node.childNodes && node.childNodes.length > 0) {
  304. payload += shaka.text.TtmlTextParser.sanitizeTextContent(
  305. /** @type {!Element} */ (node),
  306. whitespaceTrim
  307. );
  308. } else if (whitespaceTrim) {
  309. // Trim leading and trailing whitespace.
  310. let trimmed = node.textContent.trim();
  311. // Collapse multiple spaces into one.
  312. trimmed = trimmed.replace(/\s+/g, ' ');
  313. payload += trimmed;
  314. } else {
  315. payload += node.textContent;
  316. }
  317. }
  318. return payload;
  319. };
  320. /**
  321. * Parses an Element into a TextTrackCue or VTTCue.
  322. *
  323. * @param {!Element} cueElement
  324. * @param {number} offset
  325. * @param {!shaka.text.TtmlTextParser.RateInfo_} rateInfo
  326. * @param {!Array.<!Element>} metadataElements
  327. * @param {!Array.<!Element>} styles
  328. * @param {!Array.<!Element>} regionElements
  329. * @param {!Array.<!shaka.text.CueRegion>} cueRegions
  330. * @param {boolean} whitespaceTrim
  331. * @param {boolean} isNested
  332. * @return {shaka.text.Cue}
  333. * @private
  334. */
  335. shaka.text.TtmlTextParser.parseCue_ = function(
  336. cueElement, offset, rateInfo, metadataElements, styles, regionElements,
  337. cueRegions, whitespaceTrim, isNested) {
  338. if (isNested && cueElement.nodeName == 'br') {
  339. const cue = new shaka.text.Cue(0, 0, '');
  340. cue.spacer = true;
  341. return cue;
  342. }
  343. const isTextContentEmpty = /^[\s\n]*$/.test(cueElement.textContent);
  344. const hasNoTimeAttributes = cueElement.nodeType == Node.ELEMENT_NODE &&
  345. !cueElement.hasAttribute('begin') &&
  346. !cueElement.hasAttribute('end');
  347. if (
  348. cueElement.nodeType != Node.ELEMENT_NODE ||
  349. /* Disregards empty elements without time attributes nor content
  350. * <p begin="..." smpte:backgroundImage="..." /> will go through,
  351. * as some information could be holded by its attributes
  352. * <p />, <div></div> won't,
  353. * as they don't have means to be displayed into a playback sequence
  354. */
  355. (hasNoTimeAttributes && isTextContentEmpty) ||
  356. /*
  357. * Let nested cue without time attributes through:
  358. * time attributes are holded by its parent
  359. */
  360. (hasNoTimeAttributes && !isNested)
  361. ) {
  362. return null;
  363. }
  364. const spaceStyle = cueElement.getAttribute('xml:space') ||
  365. (whitespaceTrim ? 'default' : 'preserve');
  366. const localWhitespaceTrim = spaceStyle == 'default';
  367. // Get time.
  368. let start = shaka.text.TtmlTextParser.parseTime_(
  369. cueElement.getAttribute('begin'), rateInfo);
  370. let end = shaka.text.TtmlTextParser.parseTime_(
  371. cueElement.getAttribute('end'), rateInfo);
  372. let duration = shaka.text.TtmlTextParser.parseTime_(
  373. cueElement.getAttribute('dur'), rateInfo);
  374. if (end == null && duration != null) {
  375. end = start + duration;
  376. }
  377. if (!isNested && (start == null || end == null)) {
  378. throw new shaka.util.Error(
  379. shaka.util.Error.Severity.CRITICAL,
  380. shaka.util.Error.Category.TEXT,
  381. shaka.util.Error.Code.INVALID_TEXT_CUE);
  382. }
  383. if (isNested && start == null) {
  384. start = 0;
  385. } else {
  386. start += offset;
  387. }
  388. if (isNested && end == null) {
  389. end = 0;
  390. } else {
  391. end += offset;
  392. }
  393. let payload = '';
  394. const nestedCues = [];
  395. // If one of the children is a text node with something other than whitespace
  396. // in it, stop going down and write the payload.
  397. if (
  398. Array.from(cueElement.childNodes).find(
  399. (childNode) => childNode.nodeType === Node.TEXT_NODE &&
  400. /\S+/.test(childNode.textContent)
  401. )
  402. ) {
  403. payload = shaka.text.TtmlTextParser.sanitizeTextContent(
  404. cueElement,
  405. localWhitespaceTrim);
  406. } else {
  407. for (const childNode of cueElement.childNodes) {
  408. const nestedCue = shaka.text.TtmlTextParser.parseCue_(
  409. /** @type {!Element} */ (childNode),
  410. offset,
  411. rateInfo,
  412. metadataElements,
  413. styles,
  414. regionElements,
  415. cueRegions,
  416. localWhitespaceTrim,
  417. /* isNested */ true);
  418. if (nestedCue) {
  419. // Set the start time and end time for the nested cues.
  420. nestedCue.startTime = nestedCue.startTime || start;
  421. nestedCue.endTime = nestedCue.endTime || end;
  422. nestedCues.push(nestedCue);
  423. }
  424. }
  425. }
  426. const cue = new shaka.text.Cue(start, end, payload);
  427. cue.nestedCues = nestedCues;
  428. // Get other properties if available.
  429. const regionElement = shaka.text.TtmlTextParser.getElementsFromCollection_(
  430. cueElement, 'region', regionElements, /* prefix= */ '')[0];
  431. if (regionElement && regionElement.getAttribute('xml:id')) {
  432. const regionId = regionElement.getAttribute('xml:id');
  433. cue.region = cueRegions.filter((region) => region.id == regionId)[0];
  434. }
  435. const imageElement = shaka.text.TtmlTextParser.getElementsFromCollection_(
  436. cueElement, 'backgroundImage', metadataElements, '#',
  437. shaka.text.TtmlTextParser.smpteNs_)[0];
  438. shaka.text.TtmlTextParser.addStyle_(
  439. cue,
  440. cueElement,
  441. regionElement,
  442. imageElement,
  443. styles);
  444. return cue;
  445. };
  446. /**
  447. * Parses an Element into a TextTrackCue or VTTCue.
  448. *
  449. * @param {!Element} regionElement
  450. * @param {!Array.<!Element>} styles Defined in the top of tt element and
  451. * used principally for images.
  452. * @param {?string} globalExtent
  453. * @return {shaka.text.CueRegion}
  454. * @private
  455. */
  456. shaka.text.TtmlTextParser.parseCueRegion_ = function(regionElement, styles,
  457. globalExtent) {
  458. const TtmlTextParser = shaka.text.TtmlTextParser;
  459. let region = new shaka.text.CueRegion();
  460. let id = regionElement.getAttribute('xml:id');
  461. if (!id) {
  462. shaka.log.warning('TtmlTextParser parser encountered a region with ' +
  463. 'no id. Region will be ignored.');
  464. return null;
  465. }
  466. region.id = id;
  467. let globalResults = null;
  468. if (globalExtent) {
  469. globalResults = TtmlTextParser.percentValues_.exec(globalExtent) ||
  470. TtmlTextParser.pixelValues_.exec(globalExtent);
  471. }
  472. const globalWidth = globalResults ? Number(globalResults[1]) : null;
  473. const globalHeight = globalResults ? Number(globalResults[2]) : null;
  474. let results = null;
  475. let percentage = null;
  476. let extent = TtmlTextParser.getStyleAttributeFromRegion_(
  477. regionElement, styles, 'extent');
  478. if (extent) {
  479. percentage = TtmlTextParser.percentValues_.exec(extent);
  480. results = percentage || TtmlTextParser.pixelValues_.exec(extent);
  481. if (results != null) {
  482. if (globalWidth != null) {
  483. region.width = Number(results[1]) * 100 / globalWidth;
  484. } else {
  485. region.width = Number(results[1]);
  486. }
  487. if (globalHeight != null) {
  488. region.height = Number(results[2]) * 100 / globalHeight;
  489. } else {
  490. region.height = Number(results[2]);
  491. }
  492. region.widthUnits = percentage || globalWidth != null ?
  493. shaka.text.CueRegion.units.PERCENTAGE :
  494. shaka.text.CueRegion.units.PX;
  495. region.heightUnits = percentage || globalHeight != null ?
  496. shaka.text.CueRegion.units.PERCENTAGE :
  497. shaka.text.CueRegion.units.PX;
  498. }
  499. }
  500. let origin = TtmlTextParser.getStyleAttributeFromRegion_(
  501. regionElement, styles, 'origin');
  502. if (origin) {
  503. percentage = TtmlTextParser.percentValues_.exec(origin);
  504. results = percentage || TtmlTextParser.pixelValues_.exec(origin);
  505. if (results != null) {
  506. if (globalWidth != null) {
  507. region.viewportAnchorX = Number(results[1]) * 100 / globalWidth;
  508. } else {
  509. region.viewportAnchorX = Number(results[1]);
  510. }
  511. if (globalHeight != null) {
  512. region.viewportAnchorY = Number(results[2]) * 100 / globalHeight;
  513. } else {
  514. region.viewportAnchorY = Number(results[2]);
  515. }
  516. region.viewportAnchorUnits = percentage || globalWidth != null ?
  517. shaka.text.CueRegion.units.PERCENTAGE :
  518. shaka.text.CueRegion.units.PX;
  519. }
  520. }
  521. return region;
  522. };
  523. /**
  524. * Adds applicable style properties to a cue.
  525. *
  526. * @param {!shaka.text.Cue} cue
  527. * @param {!Element} cueElement
  528. * @param {Element} region
  529. * @param {Element} imageElement
  530. * @param {!Array.<!Element>} styles
  531. * @private
  532. */
  533. shaka.text.TtmlTextParser.addStyle_ = function(
  534. cue, cueElement, region, imageElement, styles) {
  535. const TtmlTextParser = shaka.text.TtmlTextParser;
  536. const Cue = shaka.text.Cue;
  537. let direction = TtmlTextParser.getStyleAttribute_(
  538. cueElement, region, styles, 'direction');
  539. if (direction == 'rtl') {
  540. cue.direction = Cue.direction.HORIZONTAL_RIGHT_TO_LEFT;
  541. }
  542. // Direction attribute specifies one-dimentional writing direction
  543. // (left to right or right to left). Writing mode specifies that
  544. // plus whether text is vertical or horizontal.
  545. // They should not contradict each other. If they do, we give
  546. // preference to writing mode.
  547. let writingMode = TtmlTextParser.getStyleAttribute_(
  548. cueElement, region, styles, 'writingMode');
  549. // Set cue's direction if the text is horizontal, and cue's writingMode if
  550. // it's vertical.
  551. if (writingMode == 'tb' || writingMode == 'tblr') {
  552. cue.writingMode = Cue.writingMode.VERTICAL_LEFT_TO_RIGHT;
  553. } else if (writingMode == 'tbrl') {
  554. cue.writingMode = Cue.writingMode.VERTICAL_RIGHT_TO_LEFT;
  555. } else if (writingMode == 'rltb' || writingMode == 'rl') {
  556. cue.direction = Cue.direction.HORIZONTAL_RIGHT_TO_LEFT;
  557. } else if (writingMode) {
  558. cue.direction = Cue.direction.HORIZONTAL_LEFT_TO_RIGHT;
  559. }
  560. let align = TtmlTextParser.getStyleAttribute_(
  561. cueElement, region, styles, 'textAlign');
  562. if (align) {
  563. cue.positionAlign = TtmlTextParser.textAlignToPositionAlign_[align];
  564. cue.lineAlign = TtmlTextParser.textAlignToLineAlign_[align];
  565. goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
  566. align.toUpperCase() +
  567. ' Should be in Cue.textAlign values!');
  568. cue.textAlign = Cue.textAlign[align.toUpperCase()];
  569. } else {
  570. // Default value is START: https://bit.ly/32OGmvo
  571. cue.textAlign = Cue.textAlign.START;
  572. }
  573. let displayAlign = TtmlTextParser.getStyleAttribute_(
  574. cueElement, region, styles, 'displayAlign');
  575. if (displayAlign) {
  576. goog.asserts.assert(displayAlign.toUpperCase() in Cue.displayAlign,
  577. displayAlign.toUpperCase() +
  578. ' Should be in Cue.displayAlign values!');
  579. cue.displayAlign = Cue.displayAlign[displayAlign.toUpperCase()];
  580. }
  581. let color = TtmlTextParser.getStyleAttribute_(
  582. cueElement, region, styles, 'color');
  583. if (color) {
  584. cue.color = color;
  585. }
  586. let backgroundColor = TtmlTextParser.getStyleAttribute_(
  587. cueElement, region, styles, 'backgroundColor');
  588. if (backgroundColor) {
  589. cue.backgroundColor = backgroundColor;
  590. }
  591. let fontFamily = TtmlTextParser.getStyleAttribute_(
  592. cueElement, region, styles, 'fontFamily');
  593. if (fontFamily) {
  594. cue.fontFamily = fontFamily;
  595. }
  596. let fontWeight = TtmlTextParser.getStyleAttribute_(
  597. cueElement, region, styles, 'fontWeight');
  598. if (fontWeight && fontWeight == 'bold') {
  599. cue.fontWeight = Cue.fontWeight.BOLD;
  600. }
  601. let wrapOption = TtmlTextParser.getStyleAttribute_(
  602. cueElement, region, styles, 'wrapOption');
  603. if (wrapOption && wrapOption == 'noWrap') {
  604. cue.wrapLine = false;
  605. }
  606. let lineHeight = TtmlTextParser.getStyleAttribute_(
  607. cueElement, region, styles, 'lineHeight');
  608. if (lineHeight && lineHeight.match(TtmlTextParser.unitValues_)) {
  609. cue.lineHeight = lineHeight;
  610. }
  611. let fontSize = TtmlTextParser.getStyleAttribute_(
  612. cueElement, region, styles, 'fontSize');
  613. if (fontSize && fontSize.match(TtmlTextParser.unitValues_)) {
  614. cue.fontSize = fontSize;
  615. }
  616. let fontStyle = TtmlTextParser.getStyleAttribute_(
  617. cueElement, region, styles, 'fontStyle');
  618. if (fontStyle) {
  619. goog.asserts.assert(fontStyle.toUpperCase() in Cue.fontStyle,
  620. fontStyle.toUpperCase() +
  621. ' Should be in Cue.fontStyle values!');
  622. cue.fontStyle = Cue.fontStyle[fontStyle.toUpperCase()];
  623. }
  624. if (imageElement) {
  625. // According to the spec, we should use imageType (camelCase), but
  626. // historically we have checked for imagetype (lowercase).
  627. // This was the case since background image support was first introduced
  628. // in PR #1859, in April 2019, and first released in v2.5.0.
  629. // Now we check for both, although only imageType (camelCase) is to spec.
  630. const backgroundImageType =
  631. imageElement.getAttribute('imageType') ||
  632. imageElement.getAttribute('imagetype');
  633. const backgroundImageEncoding = imageElement.getAttribute('encoding');
  634. const backgroundImageData = imageElement.textContent.trim();
  635. if (backgroundImageType == 'PNG' &&
  636. backgroundImageEncoding == 'Base64' &&
  637. backgroundImageData) {
  638. cue.backgroundImage = 'data:image/png;base64,' + backgroundImageData;
  639. }
  640. }
  641. // Text decoration is an array of values which can come both from the
  642. // element's style or be inherited from elements' parent nodes. All of those
  643. // values should be applied as long as they don't contradict each other. If
  644. // they do, elements' own style gets preference.
  645. let textDecorationRegion = TtmlTextParser.getStyleAttributeFromRegion_(
  646. region, styles, 'textDecoration');
  647. if (textDecorationRegion) {
  648. TtmlTextParser.addTextDecoration_(cue, textDecorationRegion);
  649. }
  650. let textDecorationElement = TtmlTextParser.getStyleAttributeFromElement_(
  651. cueElement, styles, 'textDecoration');
  652. if (textDecorationElement) {
  653. TtmlTextParser.addTextDecoration_(cue, textDecorationElement);
  654. }
  655. };
  656. /**
  657. * Parses text decoration values and adds/removes them to/from the cue.
  658. *
  659. * @param {!shaka.text.Cue} cue
  660. * @param {string} decoration
  661. * @private
  662. */
  663. shaka.text.TtmlTextParser.addTextDecoration_ = function(cue, decoration) {
  664. const Cue = shaka.text.Cue;
  665. let values = decoration.split(' ');
  666. for (let i = 0; i < values.length; i++) {
  667. switch (values[i]) {
  668. case 'underline':
  669. if (!cue.textDecoration.includes(Cue.textDecoration.UNDERLINE)) {
  670. cue.textDecoration.push(Cue.textDecoration.UNDERLINE);
  671. }
  672. break;
  673. case 'noUnderline':
  674. if (cue.textDecoration.includes(Cue.textDecoration.UNDERLINE)) {
  675. shaka.util.ArrayUtils.remove(cue.textDecoration,
  676. Cue.textDecoration.UNDERLINE);
  677. }
  678. break;
  679. case 'lineThrough':
  680. if (!cue.textDecoration.includes(Cue.textDecoration.LINE_THROUGH)) {
  681. cue.textDecoration.push(Cue.textDecoration.LINE_THROUGH);
  682. }
  683. break;
  684. case 'noLineThrough':
  685. if (cue.textDecoration.includes(Cue.textDecoration.LINE_THROUGH)) {
  686. shaka.util.ArrayUtils.remove(cue.textDecoration,
  687. Cue.textDecoration.LINE_THROUGH);
  688. }
  689. break;
  690. case 'overline':
  691. if (!cue.textDecoration.includes(Cue.textDecoration.OVERLINE)) {
  692. cue.textDecoration.push(Cue.textDecoration.OVERLINE);
  693. }
  694. break;
  695. case 'noOverline':
  696. if (cue.textDecoration.includes(Cue.textDecoration.OVERLINE)) {
  697. shaka.util.ArrayUtils.remove(cue.textDecoration,
  698. Cue.textDecoration.OVERLINE);
  699. }
  700. break;
  701. }
  702. }
  703. };
  704. /**
  705. * Finds a specified attribute on either the original cue element or its
  706. * associated region and returns the value if the attribute was found.
  707. *
  708. * @param {!Element} cueElement
  709. * @param {Element} region
  710. * @param {!Array.<!Element>} styles
  711. * @param {string} attribute
  712. * @return {?string}
  713. * @private
  714. */
  715. shaka.text.TtmlTextParser.getStyleAttribute_ = function(
  716. cueElement, region, styles, attribute) {
  717. // An attribute can be specified on region level or in a styling block
  718. // associated with the region or original element.
  719. const TtmlTextParser = shaka.text.TtmlTextParser;
  720. let attr = TtmlTextParser.getStyleAttributeFromElement_(
  721. cueElement, styles, attribute);
  722. if (attr) {
  723. return attr;
  724. }
  725. return TtmlTextParser.getStyleAttributeFromRegion_(
  726. region, styles, attribute);
  727. };
  728. /**
  729. * Finds a specified attribute on the element's associated region
  730. * and returns the value if the attribute was found.
  731. *
  732. * @param {Element} region
  733. * @param {!Array.<!Element>} styles
  734. * @param {string} attribute
  735. * @return {?string}
  736. * @private
  737. */
  738. shaka.text.TtmlTextParser.getStyleAttributeFromRegion_ = function(
  739. region, styles, attribute) {
  740. const XmlUtils = shaka.util.XmlUtils;
  741. const ttsNs = shaka.text.TtmlTextParser.styleNs_;
  742. if (!region) {
  743. return null;
  744. }
  745. let regionChildren = shaka.text.TtmlTextParser.getLeafNodes_(region);
  746. for (let i = 0; i < regionChildren.length; i++) {
  747. let attr = XmlUtils.getAttributeNS(regionChildren[i], ttsNs, attribute);
  748. if (attr) {
  749. return attr;
  750. }
  751. }
  752. return shaka.text.TtmlTextParser.getInheritedStyleAttribute_(
  753. region, styles, attribute);
  754. };
  755. /**
  756. * Finds a specified attribute on the cue element and returns the value
  757. * if the attribute was found.
  758. *
  759. * @param {!Element} cueElement
  760. * @param {!Array.<!Element>} styles
  761. * @param {string} attribute
  762. * @return {?string}
  763. * @private
  764. */
  765. shaka.text.TtmlTextParser.getStyleAttributeFromElement_ = function(
  766. cueElement, styles, attribute) {
  767. const XmlUtils = shaka.util.XmlUtils;
  768. const ttsNs = shaka.text.TtmlTextParser.styleNs_;
  769. // Styling on elements should take precedence
  770. // over the main styling attributes
  771. const elementAttribute = XmlUtils.getAttributeNS(
  772. cueElement,
  773. ttsNs,
  774. attribute);
  775. if (elementAttribute) {
  776. return elementAttribute;
  777. }
  778. return shaka.text.TtmlTextParser.getInheritedStyleAttribute_(
  779. cueElement, styles, attribute);
  780. };
  781. /**
  782. * Finds a specified attribute on an element's styles and the styles those
  783. * styles inherit from.
  784. *
  785. * @param {!Element} element
  786. * @param {!Array.<!Element>} styles
  787. * @param {string} attribute
  788. * @return {?string}
  789. * @private
  790. */
  791. shaka.text.TtmlTextParser.getInheritedStyleAttribute_ = function(
  792. element, styles, attribute) {
  793. const XmlUtils = shaka.util.XmlUtils;
  794. const ttsNs = shaka.text.TtmlTextParser.styleNs_;
  795. const inheritedStyles = shaka.text.TtmlTextParser.getElementsFromCollection_(
  796. element, 'style', styles, /* prefix= */ '');
  797. let styleValue = null;
  798. // The last value in our styles stack takes the precedence over the others
  799. for (let i = 0; i < inheritedStyles.length; i++) {
  800. let styleAttributeValue = XmlUtils.getAttributeNS(
  801. inheritedStyles[i],
  802. ttsNs,
  803. attribute);
  804. if (!styleAttributeValue) {
  805. // Styles can inherit from other styles, so traverse up that chain.
  806. styleAttributeValue =
  807. shaka.text.TtmlTextParser.getStyleAttributeFromElement_(
  808. inheritedStyles[i], styles, attribute);
  809. }
  810. if (styleAttributeValue) {
  811. styleValue = styleAttributeValue;
  812. }
  813. }
  814. return styleValue;
  815. };
  816. /**
  817. * Selects items from |collection| whose id matches |attributeName|
  818. * from |element|.
  819. *
  820. * @param {Element} element
  821. * @param {string} attributeName
  822. * @param {!Array.<Element>} collection
  823. * @param {string} prefixName
  824. * @param {string=} nsName
  825. * @return {!Array.<!Element>}
  826. * @private
  827. */
  828. shaka.text.TtmlTextParser.getElementsFromCollection_ = function(
  829. element, attributeName, collection, prefixName, nsName) {
  830. const items = [];
  831. if (!element || collection.length < 1) {
  832. return items;
  833. }
  834. const attributeValue = shaka.text.TtmlTextParser.getInheritedAttribute_(
  835. element, attributeName, nsName);
  836. if (attributeValue) {
  837. // There could be multiple items in one attribute
  838. // <span style="style1 style2">A cue</span>
  839. const itemNames = attributeValue.split(' ');
  840. for (const name of itemNames) {
  841. for (const item of collection) {
  842. if ((prefixName + item.getAttribute('xml:id')) == name) {
  843. items.push(item);
  844. break;
  845. }
  846. }
  847. }
  848. }
  849. return items;
  850. };
  851. /**
  852. * Traverses upwards from a given node until a given attribute is found.
  853. *
  854. * @param {!Element} element
  855. * @param {string} attributeName
  856. * @param {string=} nsName
  857. * @return {?string}
  858. * @private
  859. */
  860. shaka.text.TtmlTextParser.getInheritedAttribute_ = function(
  861. element, attributeName, nsName) {
  862. let ret = null;
  863. const XmlUtils = shaka.util.XmlUtils;
  864. while (element) {
  865. ret = nsName ? XmlUtils.getAttributeNS(element, nsName, attributeName)
  866. : element.getAttribute(attributeName);
  867. if (ret) {
  868. break;
  869. }
  870. // Element.parentNode can lead to XMLDocument, which is not an Element and
  871. // has no getAttribute().
  872. let parentNode = element.parentNode;
  873. if (parentNode instanceof Element) {
  874. element = parentNode;
  875. } else {
  876. break;
  877. }
  878. }
  879. return ret;
  880. };
  881. /**
  882. * Parses a TTML time from the given word.
  883. *
  884. * @param {string} text
  885. * @param {!shaka.text.TtmlTextParser.RateInfo_} rateInfo
  886. * @return {?number}
  887. * @private
  888. */
  889. shaka.text.TtmlTextParser.parseTime_ = function(text, rateInfo) {
  890. let ret = null;
  891. const TtmlTextParser = shaka.text.TtmlTextParser;
  892. if (TtmlTextParser.timeColonFormatFrames_.test(text)) {
  893. ret = TtmlTextParser.parseColonTimeWithFrames_(rateInfo, text);
  894. } else if (TtmlTextParser.timeColonFormat_.test(text)) {
  895. ret = TtmlTextParser.parseTimeFromRegex_(
  896. TtmlTextParser.timeColonFormat_, text);
  897. } else if (TtmlTextParser.timeColonFormatMilliseconds_.test(text)) {
  898. ret = TtmlTextParser.parseTimeFromRegex_(
  899. TtmlTextParser.timeColonFormatMilliseconds_, text);
  900. } else if (TtmlTextParser.timeFramesFormat_.test(text)) {
  901. ret = TtmlTextParser.parseFramesTime_(rateInfo, text);
  902. } else if (TtmlTextParser.timeTickFormat_.test(text)) {
  903. ret = TtmlTextParser.parseTickTime_(rateInfo, text);
  904. } else if (TtmlTextParser.timeHMSFormat_.test(text)) {
  905. ret = TtmlTextParser.parseTimeFromRegex_(
  906. TtmlTextParser.timeHMSFormat_, text);
  907. }
  908. return ret;
  909. };
  910. /**
  911. * Parses a TTML time in frame format.
  912. *
  913. * @param {!shaka.text.TtmlTextParser.RateInfo_} rateInfo
  914. * @param {string} text
  915. * @return {?number}
  916. * @private
  917. */
  918. shaka.text.TtmlTextParser.parseFramesTime_ = function(rateInfo, text) {
  919. // 75f or 75.5f
  920. let results = shaka.text.TtmlTextParser.timeFramesFormat_.exec(text);
  921. let frames = Number(results[1]);
  922. return frames / rateInfo.frameRate;
  923. };
  924. /**
  925. * Parses a TTML time in tick format.
  926. *
  927. * @param {!shaka.text.TtmlTextParser.RateInfo_} rateInfo
  928. * @param {string} text
  929. * @return {?number}
  930. * @private
  931. */
  932. shaka.text.TtmlTextParser.parseTickTime_ = function(rateInfo, text) {
  933. // 50t or 50.5t
  934. let results = shaka.text.TtmlTextParser.timeTickFormat_.exec(text);
  935. let ticks = Number(results[1]);
  936. return ticks / rateInfo.tickRate;
  937. };
  938. /**
  939. * Parses a TTML colon formatted time containing frames.
  940. *
  941. * @param {!shaka.text.TtmlTextParser.RateInfo_} rateInfo
  942. * @param {string} text
  943. * @return {?number}
  944. * @private
  945. */
  946. shaka.text.TtmlTextParser.parseColonTimeWithFrames_ = function(
  947. rateInfo, text) {
  948. // 01:02:43:07 ('07' is frames) or 01:02:43:07.1 (subframes)
  949. let results = shaka.text.TtmlTextParser.timeColonFormatFrames_.exec(text);
  950. let hours = Number(results[1]);
  951. let minutes = Number(results[2]);
  952. let seconds = Number(results[3]);
  953. let frames = Number(results[4]);
  954. let subframes = Number(results[5]) || 0;
  955. frames += subframes / rateInfo.subFrameRate;
  956. seconds += frames / rateInfo.frameRate;
  957. return seconds + (minutes * 60) + (hours * 3600);
  958. };
  959. /**
  960. * Parses a TTML time with a given regex. Expects regex to be some
  961. * sort of a time-matcher to match hours, minutes, seconds and milliseconds
  962. *
  963. * @param {!RegExp} regex
  964. * @param {string} text
  965. * @return {?number}
  966. * @private
  967. */
  968. shaka.text.TtmlTextParser.parseTimeFromRegex_ = function(regex, text) {
  969. let results = regex.exec(text);
  970. if (results == null || results[0] == '') {
  971. return null;
  972. }
  973. // This capture is optional, but will still be in the array as undefined,
  974. // in which case it is 0.
  975. let hours = Number(results[1]) || 0;
  976. let minutes = Number(results[2]) || 0;
  977. let seconds = Number(results[3]) || 0;
  978. let miliseconds = Number(results[4]) || 0;
  979. return (miliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
  980. };
  981. /**
  982. * Contains information about frame/subframe rate
  983. * and frame rate multiplier for time in frame format.
  984. *
  985. * @example 01:02:03:04(4 frames) or 01:02:03:04.1(4 frames, 1 subframe)
  986. * @param {?string} frameRate
  987. * @param {?string} subFrameRate
  988. * @param {?string} frameRateMultiplier
  989. * @param {?string} tickRate
  990. * @constructor
  991. * @struct
  992. * @private
  993. */
  994. shaka.text.TtmlTextParser.RateInfo_ = function(
  995. frameRate, subFrameRate, frameRateMultiplier, tickRate) {
  996. /**
  997. * @type {number}
  998. */
  999. this.frameRate = Number(frameRate) || 30;
  1000. /**
  1001. * @type {number}
  1002. */
  1003. this.subFrameRate = Number(subFrameRate) || 1;
  1004. /**
  1005. * @type {number}
  1006. */
  1007. this.tickRate = Number(tickRate);
  1008. if (this.tickRate == 0) {
  1009. if (frameRate) {
  1010. this.tickRate = this.frameRate * this.subFrameRate;
  1011. } else {
  1012. this.tickRate = 1;
  1013. }
  1014. }
  1015. if (frameRateMultiplier) {
  1016. const multiplierResults = /^(\d+) (\d+)$/g.exec(frameRateMultiplier);
  1017. if (multiplierResults) {
  1018. const numerator = Number(multiplierResults[1]);
  1019. const denominator = Number(multiplierResults[2]);
  1020. const multiplierNum = numerator / denominator;
  1021. this.frameRate *= multiplierNum;
  1022. }
  1023. }
  1024. };
  1025. /**
  1026. * The namespace URL for SMPTE fields.
  1027. * @const {string}
  1028. * @private
  1029. */
  1030. shaka.text.TtmlTextParser.smpteNs_ =
  1031. 'http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt';
  1032. shaka.text.TextEngine.registerParser(
  1033. 'application/ttml+xml',
  1034. shaka.text.TtmlTextParser);