nficano commited on
Commit
549e8af
·
unverified ·
2 Parent(s): d3d1869 ec0d6b8

Merge pull request #1520 from fischcode/Fix-parser-py

Browse files
Files changed (1) hide show
  1. pytube/parser.py +12 -5
pytube/parser.py CHANGED
@@ -74,18 +74,23 @@ def find_object_from_startpoint(html, start_point):
74
 
75
  # First letter MUST be a open brace, so we put that in the stack,
76
  # and skip the first character.
 
 
77
  stack = [html[0]]
78
  i = 1
79
 
80
  context_closers = {
81
  '{': '}',
82
  '[': ']',
83
- '"': '"'
 
84
  }
85
 
86
  while i < len(html):
87
  if len(stack) == 0:
88
  break
 
 
89
  curr_char = html[i]
90
  curr_context = stack[-1]
91
 
@@ -95,17 +100,19 @@ def find_object_from_startpoint(html, start_point):
95
  i += 1
96
  continue
97
 
98
- # Strings require special context handling because they can contain
99
  # context openers *and* closers
100
- if curr_context == '"':
101
- # If there's a backslash in a string, we skip a character
102
  if curr_char == '\\':
103
  i += 2
104
  continue
105
  else:
106
  # Non-string contexts are when we need to look for context openers.
107
  if curr_char in context_closers.keys():
108
- stack.append(curr_char)
 
 
109
 
110
  i += 1
111
 
 
74
 
75
  # First letter MUST be a open brace, so we put that in the stack,
76
  # and skip the first character.
77
+ last_char = '{'
78
+ curr_char = None
79
  stack = [html[0]]
80
  i = 1
81
 
82
  context_closers = {
83
  '{': '}',
84
  '[': ']',
85
+ '"': '"',
86
+ '/': '/' # javascript regex
87
  }
88
 
89
  while i < len(html):
90
  if len(stack) == 0:
91
  break
92
+ if curr_char not in [' ', '\n']:
93
+ last_char = curr_char
94
  curr_char = html[i]
95
  curr_context = stack[-1]
96
 
 
100
  i += 1
101
  continue
102
 
103
+ # Strings and regex expressions require special context handling because they can contain
104
  # context openers *and* closers
105
+ if curr_context in ['"', '/']:
106
+ # If there's a backslash in a string or regex expression, we skip a character
107
  if curr_char == '\\':
108
  i += 2
109
  continue
110
  else:
111
  # Non-string contexts are when we need to look for context openers.
112
  if curr_char in context_closers.keys():
113
+ # Slash starts a regular expression depending on context
114
+ if not (curr_char == '/' and last_char not in ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';']):
115
+ stack.append(curr_char)
116
 
117
  i += 1
118