Merge pull request #1520 from fischcode/Fix-parser-py
Browse files- pytube/parser.py +12 -5
pytube/parser.py
CHANGED
@@ -74,18 +74,23 @@ def find_object_from_startpoint(html, start_point):
|
|
74 |
|
75 |
# First letter MUST be a open brace, so we put that in the stack,
|
76 |
# and skip the first character.
|
|
|
|
|
77 |
stack = [html[0]]
|
78 |
i = 1
|
79 |
|
80 |
context_closers = {
|
81 |
'{': '}',
|
82 |
'[': ']',
|
83 |
-
'"': '"'
|
|
|
84 |
}
|
85 |
|
86 |
while i < len(html):
|
87 |
if len(stack) == 0:
|
88 |
break
|
|
|
|
|
89 |
curr_char = html[i]
|
90 |
curr_context = stack[-1]
|
91 |
|
@@ -95,17 +100,19 @@ def find_object_from_startpoint(html, start_point):
|
|
95 |
i += 1
|
96 |
continue
|
97 |
|
98 |
-
# Strings require special context handling because they can contain
|
99 |
# context openers *and* closers
|
100 |
-
if curr_context
|
101 |
-
# If there's a backslash in a string, we skip a character
|
102 |
if curr_char == '\\':
|
103 |
i += 2
|
104 |
continue
|
105 |
else:
|
106 |
# Non-string contexts are when we need to look for context openers.
|
107 |
if curr_char in context_closers.keys():
|
108 |
-
|
|
|
|
|
109 |
|
110 |
i += 1
|
111 |
|
|
|
74 |
|
75 |
# First letter MUST be a open brace, so we put that in the stack,
|
76 |
# and skip the first character.
|
77 |
+
last_char = '{'
|
78 |
+
curr_char = None
|
79 |
stack = [html[0]]
|
80 |
i = 1
|
81 |
|
82 |
context_closers = {
|
83 |
'{': '}',
|
84 |
'[': ']',
|
85 |
+
'"': '"',
|
86 |
+
'/': '/' # javascript regex
|
87 |
}
|
88 |
|
89 |
while i < len(html):
|
90 |
if len(stack) == 0:
|
91 |
break
|
92 |
+
if curr_char not in [' ', '\n']:
|
93 |
+
last_char = curr_char
|
94 |
curr_char = html[i]
|
95 |
curr_context = stack[-1]
|
96 |
|
|
|
100 |
i += 1
|
101 |
continue
|
102 |
|
103 |
+
# Strings and regex expressions require special context handling because they can contain
|
104 |
# context openers *and* closers
|
105 |
+
if curr_context in ['"', '/']:
|
106 |
+
# If there's a backslash in a string or regex expression, we skip a character
|
107 |
if curr_char == '\\':
|
108 |
i += 2
|
109 |
continue
|
110 |
else:
|
111 |
# Non-string contexts are when we need to look for context openers.
|
112 |
if curr_char in context_closers.keys():
|
113 |
+
# Slash starts a regular expression depending on context
|
114 |
+
if not (curr_char == '/' and last_char not in ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';']):
|
115 |
+
stack.append(curr_char)
|
116 |
|
117 |
i += 1
|
118 |
|