I am trying to iteratively parse a large JSON file. However, after the first few events ijson raises a JSONError: Additional data
. I have looked into ijson's sourcecode, but fail to understand what the problem is.
Here is a minimal working example, my eventual goal to extract all objects with '.com' in the body.url.
import io
import ijson
fh = io.StringIO('''{"body": {"kids": [487171, 15, 234509, 454410, 82729], "descendants": 15, "url": "http://ycombinator.com", "title": "Y Combinator", "by": "pg", "score": 61, "time": 1160418111, "type": "story", "id": 1}, "source": "firebase", "id": 1, "retrieved_at_ts": 1435938464}
{"body": {"kids": [454411], "descendants": 0, "url": "http://www.paulgraham.com/mit.html", "title": "A Student's Guide to Startups", "by": "phyllis", "score": 16, "time": 1160418628, "type": "story", "id": 2}, "source": "firebase", "id": 2, "retrieved_at_ts": 1435938464}''')
parser = ijson.parse(fh)
for prefix, event, value in parser:
print(prefix, event, value)
In [6]: start_map None
map_key body
body start_map None
body map_key kids
body.kids start_array None
body.kids.item number 487171
body.kids.item number 15
body.kids.item number 234509
body.kids.item number 454410
body.kids.item number 82729
body.kids end_array None
body map_key descendants
body.descendants number 15
body map_key url
body.url string http://ycombinator.com
body map_key title
body.title string Y Combinator
body map_key by
body.by string pg
body map_key score
body.score number 61
body map_key time
body.time number 1160418111
body map_key type
body.type string story
body map_key id
body.id number 1
body end_map None
map_key source
source string firebase
map_key id
id number 1
map_key retrieved_at_ts
retrieved_at_ts number 1435938464
end_map None
---------------------------------------------------------------------------
JSONError Traceback (most recent call last)
<ipython-input-6-db2d9e444fc8> in <module>()
----> 1 import codecs, os;__pyfile = codecs.open('''/tmp/py3985v6f''', encoding='''utf-8''');__code = __pyfile.read().encode('''utf-8''');__pyfile.close();os.remove('''/tmp/py3985v6f''');exec(compile(__code, '''/home/peon/edu/Econ 298 - Second Year Paper/src/data_management/minimal_working_example_ijson_failing.py''', 'exec'));
/home/peon/edu/Econ 298 - Second Year Paper/src/data_management/minimal_working_example_ijson_failing.py in <module>()
9
10 parser = ijson.parse(fh)
---> 11 for prefix, event, value in parser:
12 print(prefix, event, value)
/usr/local/lib/python3.4/dist-packages/ijson/common.py in parse(basic_events)
63 '''
64 path = []
---> 65 for event, value in basic_events:
66 if event == 'map_key':
67 prefix = '.'.join(path[:-1])
/usr/local/lib/python3.4/dist-packages/ijson/backends/python.py in basic_parse(file, buf_size)
190 pass
191 else:
--> 192 raise common.JSONError('Additional data')
193
194
JSONError: Additional data