diff --git a/dev/convert_to_sharegpt.py b/dev/convert_to_sharegpt.py new file mode 100644 index 0000000..365afce --- /dev/null +++ b/dev/convert_to_sharegpt.py @@ -0,0 +1,24 @@ +import json +import os + +os.chdir(r'C:\tmp') + +input_file = 'identity_conversations.jsonl' +output_file = 'identity_conversations_sharegpt.jsonl' + +count = 0 +with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out: + for line in f_in: + messages = json.loads(line.strip()) + converted = {'conversations': []} + for msg in messages: + role = 'human' if msg['role'] == 'user' else 'gpt' + converted['conversations'].append({ + 'from': role, + 'value': msg['content'] + }) + f_out.write(json.dumps(converted, ensure_ascii=False) + '\n') + count += 1 + +print(f'Converted {count} conversations to ShareGPT format') +print(f'Output saved to: {output_file}') \ No newline at end of file