我正在尝试使用System.Text.Json
.NET Core 3.0中的新JSON阅读器读取和解析无法容纳在内存中的大JSON文件。
Microsoft的示例代码将a ReadOnlySpan<byte>
作为输入
public static void Utf8JsonReaderLoop(ReadOnlySpan<byte> dataUtf8)
{
var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state: default);
while (json.Read())
{
JsonTokenType tokenType = json.TokenType;
ReadOnlySpan<byte> valueSpan = json.ValueSpan;
switch (tokenType)
{
case JsonTokenType.StartObject:
case JsonTokenType.EndObject:
break;
case JsonTokenType.StartArray:
case JsonTokenType.EndArray:
break;
case JsonTokenType.PropertyName:
break;
case JsonTokenType.String:
string valueString = json.GetString();
break;
case JsonTokenType.Number:
if (!json.TryGetInt32(out int valueInteger))
{
throw new FormatException();
}
break;
case JsonTokenType.True:
case JsonTokenType.False:
bool valueBool = json.GetBoolean();
break;
case JsonTokenType.Null:
break;
default:
throw new ArgumentException();
}
}
dataUtf8 = dataUtf8.Slice((int)json.BytesConsumed);
JsonReaderState state = json.CurrentState;
}
我正在努力寻找的是如何在a中实际使用此代码,并将FileStream
a FileStream
插入a ReadOnlySpan<byte>
。
我尝试使用以下代码读取文件,然后 ReadAndProcessLargeFile("latest-all.json");
const int megabyte = 1024 * 1024;
public static void ReadAndProcessLargeFile(string theFilename, long whereToStartReading = 0)
{
FileStream fileStram = new FileStream(theFilename, FileMode.Open, FileAccess.Read);
using (fileStram)
{
byte[] buffer = new byte[megabyte];
fileStram.Seek(whereToStartReading, SeekOrigin.Begin);
int bytesRead = fileStram.Read(buffer, 0, megabyte);
while (bytesRead > 0)
{
ProcessChunk(buffer, bytesRead);
bytesRead = fileStram.Read(buffer, 0, megabyte);
}
}
}
private static void ProcessChunk(byte[] buffer, int bytesRead)
{
var span = new ReadOnlySpan<byte>(buffer);
Utf8JsonReaderLoop(span);
}
它因错误信息而崩溃
System.Text.Json.JsonReaderException: 'Expected end of string, but instead reached end of data. LineNumber: 8 | BytePositionInLine: 123335.'
作为参考,这是我使用Newtonsoft.Json的工作代码。
dynamic o;
var serializer = new Newtonsoft.Json.JsonSerializer();
using (FileStream s = File.Open("latest-all.json", FileMode.Open))
using (StreamReader sr = new StreamReader(s))
using (JsonReader reader = new JsonTextReader(sr))
{
while (reader.Read())
{
if (reader.TokenType == JsonToken.StartObject)
{
o = serializer.Deserialize(reader);
}
}
}
更新2019-10-13:
重写Utf8JsonStreamReader以在内部使用ReadOnlySequences,并为JsonSerializer.Deserialize方法添加了包装器。
为此,我围绕Utf8JsonReader创建了一个包装器:
public ref struct Utf8JsonStreamReader
{
private readonly Stream _stream;
private readonly int _bufferSize;
private SequenceSegment? _firstSegment;
private int _firstSegmentStartIndex;
private SequenceSegment? _lastSegment;
private int _lastSegmentEndIndex;
private Utf8JsonReader _jsonReader;
private bool _keepBuffers;
private bool _isFinalBlock;
public Utf8JsonStreamReader(Stream stream, int bufferSize)
{
_stream = stream;
_bufferSize = bufferSize;
_firstSegment = null;
_firstSegmentStartIndex = 0;
_lastSegment = null;
_lastSegmentEndIndex = -1;
_jsonReader = default;
_keepBuffers = false;
_isFinalBlock = false;
}
public bool Read()
{
// read could be unsuccessful due to insufficient bufer size, retrying in loop with additional buffer segments
while (!_jsonReader.Read())
{
if (_isFinalBlock)
return false;
MoveNext();
}
return true;
}
private void MoveNext()
{
var firstSegment = _firstSegment;
_firstSegmentStartIndex += (int)_jsonReader.BytesConsumed;
// release previous segments if possible
if (!_keepBuffers)
{
while (firstSegment?.Memory.Length <= _firstSegmentStartIndex)
{
_firstSegmentStartIndex -= firstSegment.Memory.Length;
firstSegment.Dispose();
firstSegment = (SequenceSegment?)firstSegment.Next;
}
}
// create new segment
var newSegment = new SequenceSegment(_bufferSize, _lastSegment);
if (firstSegment != null)
{
_firstSegment = firstSegment;
newSegment.Previous = _lastSegment;
_lastSegment?.SetNext(newSegment);
_lastSegment = newSegment;
}
else
{
_firstSegment = _lastSegment = newSegment;
_firstSegmentStartIndex = 0;
}
// read data from stream
_lastSegmentEndIndex = _stream.Read(newSegment.Buffer.Memory.Span);
_isFinalBlock = _lastSegmentEndIndex < newSegment.Buffer.Memory.Length;
_jsonReader = new Utf8JsonReader(new ReadOnlySequence<byte>(_firstSegment, _firstSegmentStartIndex, _lastSegment, _lastSegmentEndIndex), _isFinalBlock, _jsonReader.CurrentState);
}
public T Deserialize<T>(JsonSerializerOptions? options = null)
{
// JsonSerializer.Deserialize can read only a single object. We have to extract
// object to be deserialized into separate Utf8JsonReader. This incures one additional
// pass through data (but data is only passed, not parsed).
var tokenStartIndex = _jsonReader.TokenStartIndex;
var firstSegment = _firstSegment;
var firstSegmentStartIndex = _firstSegmentStartIndex;
// loop through data until end of object is found
_keepBuffers = true;
int depth = 0;
if (TokenType == JsonTokenType.StartObject || TokenType == JsonTokenType.StartArray)
depth++;
while (depth > 0 && Read())
{
if (TokenType == JsonTokenType.StartObject || TokenType == JsonTokenType.StartArray)
depth++;
else if (TokenType == JsonTokenType.EndObject || TokenType == JsonTokenType.EndArray)
depth--;
}
_keepBuffers = false;
// end of object found, extract json reader for deserializer
var newJsonReader = new Utf8JsonReader(new ReadOnlySequence<byte>(firstSegment!, firstSegmentStartIndex, _lastSegment!, _lastSegmentEndIndex).Slice(tokenStartIndex, _jsonReader.Position), true, default);
// deserialize value
var result = JsonSerializer.Deserialize<T>(ref newJsonReader, options);
// release memory if possible
firstSegmentStartIndex = _firstSegmentStartIndex + (int)_jsonReader.BytesConsumed;
while (firstSegment?.Memory.Length < firstSegmentStartIndex)
{
firstSegmentStartIndex -= firstSegment.Memory.Length;
firstSegment.Dispose();
firstSegment = (SequenceSegment?)firstSegment.Next;
}
if (firstSegment != _firstSegment)
{
_firstSegment = firstSegment;
_firstSegmentStartIndex = firstSegmentStartIndex;
_jsonReader = new Utf8JsonReader(new ReadOnlySequence<byte>(_firstSegment!, _firstSegmentStartIndex, _lastSegment!, _lastSegmentEndIndex), _isFinalBlock, _jsonReader.CurrentState);
}
return result;
}
public void Dispose() =>_lastSegment?.Dispose();
public int CurrentDepth => _jsonReader.CurrentDepth;
public bool HasValueSequence => _jsonReader.HasValueSequence;
public long TokenStartIndex => _jsonReader.TokenStartIndex;
public JsonTokenType TokenType => _jsonReader.TokenType;
public ReadOnlySequence<byte> ValueSequence => _jsonReader.ValueSequence;
public ReadOnlySpan<byte> ValueSpan => _jsonReader.ValueSpan;
public bool GetBoolean() => _jsonReader.GetBoolean();
public byte GetByte() => _jsonReader.GetByte();
public byte[] GetBytesFromBase64() => _jsonReader.GetBytesFromBase64();
public string GetComment() => _jsonReader.GetComment();
public DateTime GetDateTime() => _jsonReader.GetDateTime();
public DateTimeOffset GetDateTimeOffset() => _jsonReader.GetDateTimeOffset();
public decimal GetDecimal() => _jsonReader.GetDecimal();
public double GetDouble() => _jsonReader.GetDouble();
public Guid GetGuid() => _jsonReader.GetGuid();
public short GetInt16() => _jsonReader.GetInt16();
public int GetInt32() => _jsonReader.GetInt32();
public long GetInt64() => _jsonReader.GetInt64();
public sbyte GetSByte() => _jsonReader.GetSByte();
public float GetSingle() => _jsonReader.GetSingle();
public string GetString() => _jsonReader.GetString();
public uint GetUInt32() => _jsonReader.GetUInt32();
public ulong GetUInt64() => _jsonReader.GetUInt64();
public bool TryGetDecimal(out byte value) => _jsonReader.TryGetByte(out value);
public bool TryGetBytesFromBase64(out byte[] value) => _jsonReader.TryGetBytesFromBase64(out value);
public bool TryGetDateTime(out DateTime value) => _jsonReader.TryGetDateTime(out value);
public bool TryGetDateTimeOffset(out DateTimeOffset value) => _jsonReader.TryGetDateTimeOffset(out value);
public bool TryGetDecimal(out decimal value) => _jsonReader.TryGetDecimal(out value);
public bool TryGetDouble(out double value) => _jsonReader.TryGetDouble(out value);
public bool TryGetGuid(out Guid value) => _jsonReader.TryGetGuid(out value);
public bool TryGetInt16(out short value) => _jsonReader.TryGetInt16(out value);
public bool TryGetInt32(out int value) => _jsonReader.TryGetInt32(out value);
public bool TryGetInt64(out long value) => _jsonReader.TryGetInt64(out value);
public bool TryGetSByte(out sbyte value) => _jsonReader.TryGetSByte(out value);
public bool TryGetSingle(out float value) => _jsonReader.TryGetSingle(out value);
public bool TryGetUInt16(out ushort value) => _jsonReader.TryGetUInt16(out value);
public bool TryGetUInt32(out uint value) => _jsonReader.TryGetUInt32(out value);
public bool TryGetUInt64(out ulong value) => _jsonReader.TryGetUInt64(out value);
private sealed class SequenceSegment : ReadOnlySequenceSegment<byte>, IDisposable
{
internal IMemoryOwner<byte> Buffer { get; }
internal SequenceSegment? Previous { get; set; }
private bool _disposed;
public SequenceSegment(int size, SequenceSegment? previous)
{
Buffer = MemoryPool<byte>.Shared.Rent(size);
Previous = previous;
Memory = Buffer.Memory;
RunningIndex = previous?.RunningIndex + previous?.Memory.Length ?? 0;
}
public void SetNext(SequenceSegment next) => Next = next;
public void Dispose()
{
if (!_disposed)
{
_disposed = true;
Buffer.Dispose();
Previous?.Dispose();
}
}
}
}
您可以使用它代替Utf8JsonReader,或将json反序列化为类型化的对象(作为System.Text.Json.JsonSerializer.Deserialize的包装)。
从巨大的JSON数组反序列化对象的用法示例:
using var stream = new FileStream("LargeData.json", FileMode.Open, FileAccess.Read);
using var jsonStreamReader = new Utf8JsonStreamReader(stream, 32 * 1024);
jsonStreamReader.Read(); // move to array start
jsonStreamReader.Read(); // move to start of the object
while (jsonStreamReader.TokenType != JsonTokenType.EndArray)
{
// deserialize object
var obj = jsonStreamReader.Deserialize<TestData>();
// JsonSerializer.Deserialize ends on last token of the object parsed,
// move to the first token of next object
jsonStreamReader.Read();
}
反序列化方法从流中读取数据,直到找到当前对象的末尾。然后,它使用读取的数据构造新的Utf8JsonReader并调用JsonSerializer.Deserialize。
其他方法将传递给Utf8JsonReader。
而且,与往常一样,不要忘记将对象放在最后。
问题内容: 我知道您可以使用Newtonsoft轻松地做到这一点。但是,当我使用.NET Core 3.0时,我正在尝试使用新方法与JSON文件进行交互,即,并且我拒绝相信我要做的一切都那么困难! 我的应用程序需要列出尚未添加到我的数据库中的用户。为了获取所有用户的完整列表,该应用程序从Web API检索JSON字符串。现在,我需要循环浏览这些用户中的每一个,并检查是否已将它们添加到我的应用程序中
问题内容: 我想使用 GSON* 在 JAVA中 解析此 JSON 文件: *** 但是我不知道如何加入root元素: 描述符 ,之后是 app3 元素,最后是 name 元素。 我遵循了本教程http://www.mkyong.com/java/gson-streaming-to-read-and-write- json/ ,但是它没有显示具有root和childs元素的情况。 问题答案: Im
我学习了这个教程http://www.mkyong.com/java/gson-streaming-to-read-and-write-json/,但它没有显示root和childs元素的情况。
我想使用Typescript解析/处理一个25 MB的JSON文件,并过滤/排序对象。。我写的代码花了几分钟(有时会超时),不确定为什么会发生这种情况,或者是否有其他方法可以提高代码的效率。 注意:代码在一个小文件上工作
问题内容: 我正在尝试使用下面的确切结构来解析JSON文件。 我正在使用以下jQuery函数: 我做错什么了吗? 问题答案: 您没有访问正确的元素。不指向,它指向最外面的元素(是它的属性)。该数组包含在: 进一步说明: 如果仅访问属性一次,则无需创建局部变量(但当然,它可能更易读)。 虽然连续使用分号是没有错的,但这是不必要和令人困惑的(至少这使我感到困惑;)
问题内容: 我有一个非常简单的JSON,其中包含产品评论,例如: 我想使用GSON将其读入我的Java应用程序中。我建立了一个类来保存每个评论的结果: 要读取JSON文件,我的代码是: 使用此代码,我只能在JSON中检索第一个评论,所以我的问题是:如何遍历所有读者并获得下一个评论?我不需要将评论存储在列表中,只需要访问对象一次。任何帮助都超过了欢迎。 问题答案: 您必须获取列表中的全部数据,然后再